ebook_tools 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 0.1.6 2013.6.7
2
+ 重构para_import_mongo,提升速度
3
+
4
+ 0.1.5 2013.6.4
5
+ 新增对epub的支持以及无目录文本文件的支持
6
+
1
7
  0.1.4 2013.5.28
2
8
  fix bug: 修复被遗漏修改的代码
3
9
 
data/ebook_tools.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{ebook_tools}
5
- s.version = '0.1.5'
5
+ s.version = '0.1.6'
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Aaron"]
9
- s.date = %q{2013-06-04}
9
+ s.date = %q{2013-06-07}
10
10
  s.description = %q{电子书工具集.}
11
11
  s.email = %q{yalong1976@gmail.com}
12
12
  s.require_paths = ["lib"]
@@ -20,9 +20,9 @@ Gem::Specification.new do |s|
20
20
  s.files = [
21
21
  "README",
22
22
  "CHANGELOG",
23
+ "bin/doc_book_import_mongo",
23
24
  "bin/ebook_tools",
24
25
  "bin/para_import_mongo",
25
- "bin/doc_book_import_mongo",
26
26
  "bin/para_import_scheduling",
27
27
  "bin/xml2json",
28
28
  "lib/doc_book_in_mongo.rb",
@@ -11,32 +11,46 @@ module ParasInMongo
11
11
  options = options.stringify_keys
12
12
  session = Moped::Session.new([ "#{options['host']}:#{options['port']}" ])
13
13
  session.use options['database']
14
-
15
- doc = Nokogiri::XML(File.open(filename).read)
16
-
17
- book_id = doc.search("book")[0]['id']
18
- title = doc.search("book info title")[0].text
19
- author = doc.search("book info author")[0].text
20
- pubdate = doc.search("book info pubdate")[0].text
21
- publisher = doc.search("book info publisher")[0].text
22
-
14
+ doc =Nokogiri::XML(File.open(filename).read)
15
+ book = extract_book_info(doc)
16
+ source ={book: book}
23
17
  paras = doc.search("para[key=yes]")
18
+ section = nil
19
+ section_title = ''
20
+ para_mongo_attrs = paras.map do |para|
21
+ unless para.parent == section
22
+ section_title = para.parent.search("info[1]/title").text
23
+ section = para.parent
24
+ end
25
+ extract_para_attrs(para,source.merge(location: {section: section_title}))
26
+ end
27
+ para_mongo_attrs.each do |attrs|
28
+ session[options['collection']].insert(attrs)
29
+ end
30
+ end
24
31
 
25
- source ={book: {title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}}
26
-
27
- paras.each do |para|
32
+ private
33
+ def extract_book_info(doc)
34
+ book_id = doc.search("book")[0]['id']
35
+ book_info = doc.search("book/info[1]")
36
+ title = book_info.search("title[1]").text
37
+ author = book_info.search("authorgroup/author/personname[1]").text
38
+ pubdate = book_info.search("pubdate[1]").text
39
+ publisher = book_info.search("publisher/publishername[1]").text
40
+ {title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}
41
+ end
42
+
43
+ def extract_para_attrs(para,source)
28
44
  para_attrs = {'_id' => para['id']}
29
- content = para.search("content")[0].text
45
+ content = para.search("content[1]").text
30
46
 
31
47
  keywords = []
32
- para.search("keyword").each do |keyword|
33
- keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
34
- end
48
+ para.search("keyword").each do |keyword|
49
+ keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
50
+ end
35
51
 
36
52
  para_attrs = para_attrs.merge(keywords: keywords, content: content)
37
- section = para.parent.search("info title").text
38
- para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
39
- session[options['collection']].insert(para_attrs)
40
- end
41
- end
53
+ #section = timer{para.parent.search("info[1]/title").text}
54
+ para_attrs.merge(source: source)
55
+ end
42
56
  end
data/lib/utils.rb CHANGED
@@ -271,4 +271,14 @@ module Utils
271
271
  return content
272
272
  end
273
273
 
274
+
275
+ def timer
276
+ time = Time.now
277
+ result = yield
278
+ end_time = Time.now
279
+ delta = end_time - time
280
+ puts "#{delta} seconds"
281
+ result
282
+ end
283
+
274
284
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebook_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-04 00:00:00.000000000 Z
12
+ date: 2013-06-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: uuid
@@ -166,9 +166,9 @@ extra_rdoc_files: []
166
166
  files:
167
167
  - README
168
168
  - CHANGELOG
169
+ - bin/doc_book_import_mongo
169
170
  - bin/ebook_tools
170
171
  - bin/para_import_mongo
171
- - bin/doc_book_import_mongo
172
172
  - bin/para_import_scheduling
173
173
  - bin/xml2json
174
174
  - lib/doc_book_in_mongo.rb