ebook_tools 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +6 -0
- data/README +5 -1
- data/bin/doc_book_import_mongo +73 -0
- data/bin/xml2json +18 -0
- data/ebook_tools.gemspec +6 -2
- data/lib/doc_book_in_mongo.rb +21 -0
- data/lib/paras_in_mongo.rb +4 -7
- metadata +6 -2
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'optparse'
|
6
|
+
require File.join(File.expand_path('../../',__FILE__),'lib','utils')
|
7
|
+
require File.join(File.expand_path('../../',__FILE__),'lib','doc_book_in_mongo')
|
8
|
+
|
9
|
+
def help
|
10
|
+
puts <<-EOF
|
11
|
+
usage:
|
12
|
+
doc_book_import_mongo [options] <docbook>
|
13
|
+
|
14
|
+
docbook: 已经标注重点段落的书文件或目录
|
15
|
+
|
16
|
+
options:
|
17
|
+
-H <host> , --host <host> : mongodb服务器,默认为localhost
|
18
|
+
-P <port> , --port <port> : mongodb服务器端口号,默认为27017 (Mongo默认端口号)
|
19
|
+
-D <database>, --database <database> : doc_book要存放的数据库
|
20
|
+
-C <collection>, --collection <collection> : doc_book存放的集合
|
21
|
+
EOF
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
options = {:host=>'localhost',:port=>27017,:database=>'resource_development',:collection=>'doc_books'}
|
27
|
+
opts = OptionParser.new do |opts|
|
28
|
+
opts.on('-H host','--host host') do |host|
|
29
|
+
options[:host] = host
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on('-P port','--port port') do |port|
|
33
|
+
options[:port] = port.to_i
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on('-D database','--database database') do |database|
|
37
|
+
options[:database] = database
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on('-C collection','--collection collection') do |collection|
|
41
|
+
options[:collection] = collection
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on('-h','--help') do
|
45
|
+
help
|
46
|
+
end
|
47
|
+
end
|
48
|
+
opts.parse ARGV
|
49
|
+
|
50
|
+
docbook = ARGV[-1]
|
51
|
+
|
52
|
+
if docbook.nil?
|
53
|
+
help
|
54
|
+
end
|
55
|
+
|
56
|
+
unless File.exists?(docbook)
|
57
|
+
raise "错误:指定的docbook不存在。"
|
58
|
+
end
|
59
|
+
|
60
|
+
files = if File.directory?(docbook)
|
61
|
+
Utils.scan_file_from_dir(docbook,{:format=>'.xml'})
|
62
|
+
else
|
63
|
+
[docbook]
|
64
|
+
end
|
65
|
+
|
66
|
+
files.each do |file|
|
67
|
+
begin
|
68
|
+
DocBookInMongo.file_in_mongo(file,options)
|
69
|
+
rescue
|
70
|
+
puts "error: #{file} import mongo failure!"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
puts "success: #{docbook} in mongo successfully!"
|
data/bin/xml2json
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#! /usr/local/bin/node
|
2
|
+
var filename = process.argv[2] || '';
|
3
|
+
var parser = require('/usr/local/lib/node_modules/xml2json'); //xml2json should be install in /usr/local/lib/node_modules
|
4
|
+
var fpath = require("fs");
|
5
|
+
|
6
|
+
var options = {
|
7
|
+
object: false,
|
8
|
+
reversible: true,
|
9
|
+
coerce: true,
|
10
|
+
sanitize: true,
|
11
|
+
trim: true
|
12
|
+
};
|
13
|
+
|
14
|
+
fpath.readFile(filename,function(err,data){
|
15
|
+
var xml = data;
|
16
|
+
var json = parser.toJson(xml,options);
|
17
|
+
console.log(json)
|
18
|
+
})
|
data/ebook_tools.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{ebook_tools}
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.5'
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Aaron"]
|
@@ -16,11 +16,14 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.rdoc_options = ["--charset=UTF-8"]
|
17
17
|
s.executables << "ebook_tools"
|
18
18
|
s.executables << "para_import_mongo"
|
19
|
+
s.executables << "doc_book_import_mongo"
|
19
20
|
s.files = [
|
20
21
|
"README",
|
21
22
|
"CHANGELOG",
|
22
23
|
"bin/ebook_tools",
|
23
24
|
"bin/para_import_mongo",
|
25
|
+
"bin/doc_book_import_mongo",
|
26
|
+
"bin/xml2json",
|
24
27
|
"lib/ebook_tools.rb",
|
25
28
|
"lib/extract_book_struct.rb",
|
26
29
|
"lib/header_detect.rb",
|
@@ -29,6 +32,7 @@ Gem::Specification.new do |s|
|
|
29
32
|
"lib/epub.rb",
|
30
33
|
"lib/utils.rb",
|
31
34
|
"lib/paras_in_mongo.rb",
|
35
|
+
"lib/doc_book_in_mongo.rb",
|
32
36
|
"ebook_tools.gemspec"
|
33
37
|
]
|
34
38
|
s.add_dependency(%q<uuid>)
|
@@ -38,5 +42,5 @@ Gem::Specification.new do |s|
|
|
38
42
|
s.add_dependency(%q<pdf-reader>)
|
39
43
|
s.add_dependency(%q<nokogiri>)
|
40
44
|
s.add_dependency(%q<levenshtein>)
|
41
|
-
s.add_dependency(%q<
|
45
|
+
s.add_dependency(%q<moped>)
|
42
46
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'moped'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module DocBookInMongo
|
7
|
+
extend self
|
8
|
+
|
9
|
+
def file_in_mongo(filename,options={})
|
10
|
+
session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
|
11
|
+
session.use options[:database]
|
12
|
+
|
13
|
+
xml2json = File.join(File.dirname(__FILE__),'..','bin','xml2json')
|
14
|
+
cmd = %Q(#{xml2json} #{filename})
|
15
|
+
output = `#{cmd}`
|
16
|
+
json = JSON.parse(output)
|
17
|
+
doc = json['book']
|
18
|
+
doc['_id'] = doc['id']
|
19
|
+
session[options[:collection]].insert(doc)
|
20
|
+
end
|
21
|
+
end
|
data/lib/paras_in_mongo.rb
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require '
|
2
|
+
require 'moped'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
|
-
include Mongo
|
6
5
|
module ParasInMongo
|
7
6
|
extend self
|
8
7
|
|
9
8
|
def file_in_mongo(filename,options={})
|
10
|
-
|
11
|
-
|
12
|
-
coll = db[options[:collection]]
|
9
|
+
session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
|
10
|
+
session.use options[:database]
|
13
11
|
|
14
12
|
doc = Nokogiri::XML(File.open(filename).read)
|
15
13
|
|
@@ -35,8 +33,7 @@ module ParasInMongo
|
|
35
33
|
para_attrs = para_attrs.merge(keywords: keywords, content: content)
|
36
34
|
section = para.parent.search("info title").text
|
37
35
|
para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
|
38
|
-
|
39
|
-
coll.insert(para_attrs)
|
36
|
+
session[options[:collection]].insert(para_attrs)
|
40
37
|
end
|
41
38
|
end
|
42
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebook_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -124,7 +124,7 @@ dependencies:
|
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0'
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
127
|
+
name: moped
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
129
129
|
none: false
|
130
130
|
requirements:
|
@@ -144,6 +144,7 @@ email: aaron@nonobo.com
|
|
144
144
|
executables:
|
145
145
|
- ebook_tools
|
146
146
|
- para_import_mongo
|
147
|
+
- doc_book_import_mongo
|
147
148
|
extensions: []
|
148
149
|
extra_rdoc_files: []
|
149
150
|
files:
|
@@ -151,6 +152,8 @@ files:
|
|
151
152
|
- CHANGELOG
|
152
153
|
- bin/ebook_tools
|
153
154
|
- bin/para_import_mongo
|
155
|
+
- bin/doc_book_import_mongo
|
156
|
+
- bin/xml2json
|
154
157
|
- lib/ebook_tools.rb
|
155
158
|
- lib/extract_book_struct.rb
|
156
159
|
- lib/header_detect.rb
|
@@ -159,6 +162,7 @@ files:
|
|
159
162
|
- lib/epub.rb
|
160
163
|
- lib/utils.rb
|
161
164
|
- lib/paras_in_mongo.rb
|
165
|
+
- lib/doc_book_in_mongo.rb
|
162
166
|
- ebook_tools.gemspec
|
163
167
|
homepage:
|
164
168
|
licenses: []
|