ebook_tools 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +6 -0
- data/README +5 -1
- data/bin/doc_book_import_mongo +73 -0
- data/bin/xml2json +18 -0
- data/ebook_tools.gemspec +6 -2
- data/lib/doc_book_in_mongo.rb +21 -0
- data/lib/paras_in_mongo.rb +4 -7
- metadata +6 -2
data/CHANGELOG
CHANGED
data/README
CHANGED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
|
|
4
|
+
require 'rubygems'
|
|
5
|
+
require 'optparse'
|
|
6
|
+
require File.join(File.expand_path('../../',__FILE__),'lib','utils')
|
|
7
|
+
require File.join(File.expand_path('../../',__FILE__),'lib','doc_book_in_mongo')
|
|
8
|
+
|
|
9
|
+
def help
|
|
10
|
+
puts <<-EOF
|
|
11
|
+
usage:
|
|
12
|
+
doc_book_import_mongo [options] <docbook>
|
|
13
|
+
|
|
14
|
+
docbook: 已经标注重点段落的书文件或目录
|
|
15
|
+
|
|
16
|
+
options:
|
|
17
|
+
-H <host> , --host <host> : mongodb服务器,默认为localhost
|
|
18
|
+
-P <port> , --port <port> : mongodb服务器端口号,默认为27017 (Mongo默认端口号)
|
|
19
|
+
-D <database>, --database <database> : doc_book要存放的数据库
|
|
20
|
+
-C <collection>, --collection <collection> : doc_book存放的集合
|
|
21
|
+
EOF
|
|
22
|
+
exit
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
options = {:host=>'localhost',:port=>27017,:database=>'resource_development',:collection=>'doc_books'}
|
|
27
|
+
opts = OptionParser.new do |opts|
|
|
28
|
+
opts.on('-H host','--host host') do |host|
|
|
29
|
+
options[:host] = host
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
opts.on('-P port','--port port') do |port|
|
|
33
|
+
options[:port] = port.to_i
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
opts.on('-D database','--database database') do |database|
|
|
37
|
+
options[:database] = database
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
opts.on('-C collection','--collection collection') do |collection|
|
|
41
|
+
options[:collection] = collection
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
opts.on('-h','--help') do
|
|
45
|
+
help
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
opts.parse ARGV
|
|
49
|
+
|
|
50
|
+
docbook = ARGV[-1]
|
|
51
|
+
|
|
52
|
+
if docbook.nil?
|
|
53
|
+
help
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
unless File.exists?(docbook)
|
|
57
|
+
raise "错误:指定的docbook不存在。"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
files = if File.directory?(docbook)
|
|
61
|
+
Utils.scan_file_from_dir(docbook,{:format=>'.xml'})
|
|
62
|
+
else
|
|
63
|
+
[docbook]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
files.each do |file|
|
|
67
|
+
begin
|
|
68
|
+
DocBookInMongo.file_in_mongo(file,options)
|
|
69
|
+
rescue
|
|
70
|
+
puts "error: #{file} import mongo failure!"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
puts "success: #{docbook} in mongo successfully!"
|
data/bin/xml2json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#! /usr/local/bin/node
|
|
2
|
+
var filename = process.argv[2] || '';
|
|
3
|
+
var parser = require('/usr/local/lib/node_modules/xml2json'); //xml2json should be install in /usr/local/lib/node_modules
|
|
4
|
+
var fpath = require("fs");
|
|
5
|
+
|
|
6
|
+
var options = {
|
|
7
|
+
object: false,
|
|
8
|
+
reversible: true,
|
|
9
|
+
coerce: true,
|
|
10
|
+
sanitize: true,
|
|
11
|
+
trim: true
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
fpath.readFile(filename,function(err,data){
|
|
15
|
+
var xml = data;
|
|
16
|
+
var json = parser.toJson(xml,options);
|
|
17
|
+
console.log(json)
|
|
18
|
+
})
|
data/ebook_tools.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = %q{ebook_tools}
|
|
5
|
-
s.version = '0.0.
|
|
5
|
+
s.version = '0.0.5'
|
|
6
6
|
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
8
8
|
s.authors = ["Aaron"]
|
|
@@ -16,11 +16,14 @@ Gem::Specification.new do |s|
|
|
|
16
16
|
s.rdoc_options = ["--charset=UTF-8"]
|
|
17
17
|
s.executables << "ebook_tools"
|
|
18
18
|
s.executables << "para_import_mongo"
|
|
19
|
+
s.executables << "doc_book_import_mongo"
|
|
19
20
|
s.files = [
|
|
20
21
|
"README",
|
|
21
22
|
"CHANGELOG",
|
|
22
23
|
"bin/ebook_tools",
|
|
23
24
|
"bin/para_import_mongo",
|
|
25
|
+
"bin/doc_book_import_mongo",
|
|
26
|
+
"bin/xml2json",
|
|
24
27
|
"lib/ebook_tools.rb",
|
|
25
28
|
"lib/extract_book_struct.rb",
|
|
26
29
|
"lib/header_detect.rb",
|
|
@@ -29,6 +32,7 @@ Gem::Specification.new do |s|
|
|
|
29
32
|
"lib/epub.rb",
|
|
30
33
|
"lib/utils.rb",
|
|
31
34
|
"lib/paras_in_mongo.rb",
|
|
35
|
+
"lib/doc_book_in_mongo.rb",
|
|
32
36
|
"ebook_tools.gemspec"
|
|
33
37
|
]
|
|
34
38
|
s.add_dependency(%q<uuid>)
|
|
@@ -38,5 +42,5 @@ Gem::Specification.new do |s|
|
|
|
38
42
|
s.add_dependency(%q<pdf-reader>)
|
|
39
43
|
s.add_dependency(%q<nokogiri>)
|
|
40
44
|
s.add_dependency(%q<levenshtein>)
|
|
41
|
-
s.add_dependency(%q<
|
|
45
|
+
s.add_dependency(%q<moped>)
|
|
42
46
|
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
require 'moped'
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module DocBookInMongo
|
|
7
|
+
extend self
|
|
8
|
+
|
|
9
|
+
def file_in_mongo(filename,options={})
|
|
10
|
+
session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
|
|
11
|
+
session.use options[:database]
|
|
12
|
+
|
|
13
|
+
xml2json = File.join(File.dirname(__FILE__),'..','bin','xml2json')
|
|
14
|
+
cmd = %Q(#{xml2json} #{filename})
|
|
15
|
+
output = `#{cmd}`
|
|
16
|
+
json = JSON.parse(output)
|
|
17
|
+
doc = json['book']
|
|
18
|
+
doc['_id'] = doc['id']
|
|
19
|
+
session[options[:collection]].insert(doc)
|
|
20
|
+
end
|
|
21
|
+
end
|
data/lib/paras_in_mongo.rb
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
|
-
require '
|
|
2
|
+
require 'moped'
|
|
3
3
|
require 'nokogiri'
|
|
4
4
|
|
|
5
|
-
include Mongo
|
|
6
5
|
module ParasInMongo
|
|
7
6
|
extend self
|
|
8
7
|
|
|
9
8
|
def file_in_mongo(filename,options={})
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
coll = db[options[:collection]]
|
|
9
|
+
session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
|
|
10
|
+
session.use options[:database]
|
|
13
11
|
|
|
14
12
|
doc = Nokogiri::XML(File.open(filename).read)
|
|
15
13
|
|
|
@@ -35,8 +33,7 @@ module ParasInMongo
|
|
|
35
33
|
para_attrs = para_attrs.merge(keywords: keywords, content: content)
|
|
36
34
|
section = para.parent.search("info title").text
|
|
37
35
|
para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
|
|
38
|
-
|
|
39
|
-
coll.insert(para_attrs)
|
|
36
|
+
session[options[:collection]].insert(para_attrs)
|
|
40
37
|
end
|
|
41
38
|
end
|
|
42
39
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ebook_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -124,7 +124,7 @@ dependencies:
|
|
|
124
124
|
- !ruby/object:Gem::Version
|
|
125
125
|
version: '0'
|
|
126
126
|
- !ruby/object:Gem::Dependency
|
|
127
|
-
name:
|
|
127
|
+
name: moped
|
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
|
129
129
|
none: false
|
|
130
130
|
requirements:
|
|
@@ -144,6 +144,7 @@ email: aaron@nonobo.com
|
|
|
144
144
|
executables:
|
|
145
145
|
- ebook_tools
|
|
146
146
|
- para_import_mongo
|
|
147
|
+
- doc_book_import_mongo
|
|
147
148
|
extensions: []
|
|
148
149
|
extra_rdoc_files: []
|
|
149
150
|
files:
|
|
@@ -151,6 +152,8 @@ files:
|
|
|
151
152
|
- CHANGELOG
|
|
152
153
|
- bin/ebook_tools
|
|
153
154
|
- bin/para_import_mongo
|
|
155
|
+
- bin/doc_book_import_mongo
|
|
156
|
+
- bin/xml2json
|
|
154
157
|
- lib/ebook_tools.rb
|
|
155
158
|
- lib/extract_book_struct.rb
|
|
156
159
|
- lib/header_detect.rb
|
|
@@ -159,6 +162,7 @@ files:
|
|
|
159
162
|
- lib/epub.rb
|
|
160
163
|
- lib/utils.rb
|
|
161
164
|
- lib/paras_in_mongo.rb
|
|
165
|
+
- lib/doc_book_in_mongo.rb
|
|
162
166
|
- ebook_tools.gemspec
|
|
163
167
|
homepage:
|
|
164
168
|
licenses: []
|