ebook_tools 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 0.0.5 2013.4.7
2
+ add doc book import mongo tools
3
+
4
+ 0.0.4 2013.4.5
5
+ add para import mongo tools
6
+
1
7
  0.0.3 2013.4.2
2
8
  add book id for docbook
3
9
 
data/README CHANGED
@@ -3,4 +3,8 @@
3
3
 
4
4
  == Installation
5
5
 
6
- gem install ebook_tools
6
+ gem install ebook_tools
7
+
8
+ == requirements
9
+ * calibre command line interface
10
+ * nodejs
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
4
+ require 'rubygems'
5
+ require 'optparse'
6
+ require File.join(File.expand_path('../../',__FILE__),'lib','utils')
7
+ require File.join(File.expand_path('../../',__FILE__),'lib','doc_book_in_mongo')
8
+
9
+ def help
10
+ puts <<-EOF
11
+ usage:
12
+ doc_book_import_mongo [options] <docbook>
13
+
14
+ docbook: 已经标注重点段落的书文件或目录
15
+
16
+ options:
17
+ -H <host> , --host <host> : mongodb服务器,默认为localhost
18
+ -P <port> , --port <port> : mongodb服务器端口号,默认为27017 (Mongo默认端口号)
19
+ -D <database>, --database <database> : doc_book要存放的数据库
20
+ -C <collection>, --collection <collection> : doc_book存放的集合
21
+ EOF
22
+ exit
23
+ end
24
+
25
+
26
+ options = {:host=>'localhost',:port=>27017,:database=>'resource_development',:collection=>'doc_books'}
27
+ opts = OptionParser.new do |opts|
28
+ opts.on('-H host','--host host') do |host|
29
+ options[:host] = host
30
+ end
31
+
32
+ opts.on('-P port','--port port') do |port|
33
+ options[:port] = port.to_i
34
+ end
35
+
36
+ opts.on('-D database','--database database') do |database|
37
+ options[:database] = database
38
+ end
39
+
40
+ opts.on('-C collection','--collection collection') do |collection|
41
+ options[:collection] = collection
42
+ end
43
+
44
+ opts.on('-h','--help') do
45
+ help
46
+ end
47
+ end
48
+ opts.parse ARGV
49
+
50
+ docbook = ARGV[-1]
51
+
52
+ if docbook.nil?
53
+ help
54
+ end
55
+
56
+ unless File.exists?(docbook)
57
+ raise "错误:指定的docbook不存在。"
58
+ end
59
+
60
+ files = if File.directory?(docbook)
61
+ Utils.scan_file_from_dir(docbook,{:format=>'.xml'})
62
+ else
63
+ [docbook]
64
+ end
65
+
66
+ files.each do |file|
67
+ begin
68
+ DocBookInMongo.file_in_mongo(file,options)
69
+ rescue
70
+ puts "error: #{file} import mongo failure!"
71
+ end
72
+ end
73
+ puts "success: #{docbook} in mongo successfully!"
data/bin/xml2json ADDED
@@ -0,0 +1,18 @@
1
+ #! /usr/local/bin/node
2
+ var filename = process.argv[2] || '';
3
+ var parser = require('/usr/local/lib/node_modules/xml2json'); //xml2json should be install in /usr/local/lib/node_modules
4
+ var fpath = require("fs");
5
+
6
+ var options = {
7
+ object: false,
8
+ reversible: true,
9
+ coerce: true,
10
+ sanitize: true,
11
+ trim: true
12
+ };
13
+
14
+ fpath.readFile(filename,function(err,data){
15
+ var xml = data;
16
+ var json = parser.toJson(xml,options);
17
+ console.log(json)
18
+ })
data/ebook_tools.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{ebook_tools}
5
- s.version = '0.0.4'
5
+ s.version = '0.0.5'
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Aaron"]
@@ -16,11 +16,14 @@ Gem::Specification.new do |s|
16
16
  s.rdoc_options = ["--charset=UTF-8"]
17
17
  s.executables << "ebook_tools"
18
18
  s.executables << "para_import_mongo"
19
+ s.executables << "doc_book_import_mongo"
19
20
  s.files = [
20
21
  "README",
21
22
  "CHANGELOG",
22
23
  "bin/ebook_tools",
23
24
  "bin/para_import_mongo",
25
+ "bin/doc_book_import_mongo",
26
+ "bin/xml2json",
24
27
  "lib/ebook_tools.rb",
25
28
  "lib/extract_book_struct.rb",
26
29
  "lib/header_detect.rb",
@@ -29,6 +32,7 @@ Gem::Specification.new do |s|
29
32
  "lib/epub.rb",
30
33
  "lib/utils.rb",
31
34
  "lib/paras_in_mongo.rb",
35
+ "lib/doc_book_in_mongo.rb",
32
36
  "ebook_tools.gemspec"
33
37
  ]
34
38
  s.add_dependency(%q<uuid>)
@@ -38,5 +42,5 @@ Gem::Specification.new do |s|
38
42
  s.add_dependency(%q<pdf-reader>)
39
43
  s.add_dependency(%q<nokogiri>)
40
44
  s.add_dependency(%q<levenshtein>)
41
- s.add_dependency(%q<mongo>)
45
+ s.add_dependency(%q<moped>)
42
46
  end
@@ -0,0 +1,21 @@
1
+ # encoding: UTF-8
2
+ require 'moped'
3
+ require 'nokogiri'
4
+ require 'json'
5
+
6
+ module DocBookInMongo
7
+ extend self
8
+
9
+ def file_in_mongo(filename,options={})
10
+ session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
11
+ session.use options[:database]
12
+
13
+ xml2json = File.join(File.dirname(__FILE__),'..','bin','xml2json')
14
+ cmd = %Q(#{xml2json} #{filename})
15
+ output = `#{cmd}`
16
+ json = JSON.parse(output)
17
+ doc = json['book']
18
+ doc['_id'] = doc['id']
19
+ session[options[:collection]].insert(doc)
20
+ end
21
+ end
@@ -1,15 +1,13 @@
1
1
  # encoding: UTF-8
2
- require 'mongo'
2
+ require 'moped'
3
3
  require 'nokogiri'
4
4
 
5
- include Mongo
6
5
  module ParasInMongo
7
6
  extend self
8
7
 
9
8
  def file_in_mongo(filename,options={})
10
- client = MongoClient.new(options[:host], options[:port])
11
- db = client[options[:database]]
12
- coll = db[options[:collection]]
9
+ session = Moped::Session.new([ "#{options[:host]}:#{options[:port]}" ])
10
+ session.use options[:database]
13
11
 
14
12
  doc = Nokogiri::XML(File.open(filename).read)
15
13
 
@@ -35,8 +33,7 @@ module ParasInMongo
35
33
  para_attrs = para_attrs.merge(keywords: keywords, content: content)
36
34
  section = para.parent.search("info title").text
37
35
  para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
38
-
39
- coll.insert(para_attrs)
36
+ session[options[:collection]].insert(para_attrs)
40
37
  end
41
38
  end
42
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebook_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -124,7 +124,7 @@ dependencies:
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
126
  - !ruby/object:Gem::Dependency
127
- name: mongo
127
+ name: moped
128
128
  requirement: !ruby/object:Gem::Requirement
129
129
  none: false
130
130
  requirements:
@@ -144,6 +144,7 @@ email: aaron@nonobo.com
144
144
  executables:
145
145
  - ebook_tools
146
146
  - para_import_mongo
147
+ - doc_book_import_mongo
147
148
  extensions: []
148
149
  extra_rdoc_files: []
149
150
  files:
@@ -151,6 +152,8 @@ files:
151
152
  - CHANGELOG
152
153
  - bin/ebook_tools
153
154
  - bin/para_import_mongo
155
+ - bin/doc_book_import_mongo
156
+ - bin/xml2json
154
157
  - lib/ebook_tools.rb
155
158
  - lib/extract_book_struct.rb
156
159
  - lib/header_detect.rb
@@ -159,6 +162,7 @@ files:
159
162
  - lib/epub.rb
160
163
  - lib/utils.rb
161
164
  - lib/paras_in_mongo.rb
165
+ - lib/doc_book_in_mongo.rb
162
166
  - ebook_tools.gemspec
163
167
  homepage:
164
168
  licenses: []