ebook_tools 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 0.1.6 2013.6.7
2
+ 重构para_import_mongo,提升速度
3
+
4
+ 0.1.5 2013.6.4
5
+ 新增对epub的支持以及无目录文本文件的支持
6
+
1
7
  0.1.4 2013.5.28
2
8
  fix bug: 修复被遗漏修改的代码
3
9
 
data/ebook_tools.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{ebook_tools}
5
- s.version = '0.1.5'
5
+ s.version = '0.1.6'
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Aaron"]
9
- s.date = %q{2013-06-04}
9
+ s.date = %q{2013-06-07}
10
10
  s.description = %q{电子书工具集.}
11
11
  s.email = %q{yalong1976@gmail.com}
12
12
  s.require_paths = ["lib"]
@@ -20,9 +20,9 @@ Gem::Specification.new do |s|
20
20
  s.files = [
21
21
  "README",
22
22
  "CHANGELOG",
23
+ "bin/doc_book_import_mongo",
23
24
  "bin/ebook_tools",
24
25
  "bin/para_import_mongo",
25
- "bin/doc_book_import_mongo",
26
26
  "bin/para_import_scheduling",
27
27
  "bin/xml2json",
28
28
  "lib/doc_book_in_mongo.rb",
@@ -11,32 +11,46 @@ module ParasInMongo
11
11
  options = options.stringify_keys
12
12
  session = Moped::Session.new([ "#{options['host']}:#{options['port']}" ])
13
13
  session.use options['database']
14
-
15
- doc = Nokogiri::XML(File.open(filename).read)
16
-
17
- book_id = doc.search("book")[0]['id']
18
- title = doc.search("book info title")[0].text
19
- author = doc.search("book info author")[0].text
20
- pubdate = doc.search("book info pubdate")[0].text
21
- publisher = doc.search("book info publisher")[0].text
22
-
14
+ doc =Nokogiri::XML(File.open(filename).read)
15
+ book = extract_book_info(doc)
16
+ source ={book: book}
23
17
  paras = doc.search("para[key=yes]")
18
+ section = nil
19
+ section_title = ''
20
+ para_mongo_attrs = paras.map do |para|
21
+ unless para.parent == section
22
+ section_title = para.parent.search("info[1]/title").text
23
+ section = para.parent
24
+ end
25
+ extract_para_attrs(para,source.merge(location: {section: section_title}))
26
+ end
27
+ para_mongo_attrs.each do |attrs|
28
+ session[options['collection']].insert(attrs)
29
+ end
30
+ end
24
31
 
25
- source ={book: {title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}}
26
-
27
- paras.each do |para|
32
+ private
33
+ def extract_book_info(doc)
34
+ book_id = doc.search("book")[0]['id']
35
+ book_info = doc.search("book/info[1]")
36
+ title = book_info.search("title[1]").text
37
+ author = book_info.search("authorgroup/author/personname[1]").text
38
+ pubdate = book_info.search("pubdate[1]").text
39
+ publisher = book_info.search("publisher/publishername[1]").text
40
+ {title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}
41
+ end
42
+
43
+ def extract_para_attrs(para,source)
28
44
  para_attrs = {'_id' => para['id']}
29
- content = para.search("content")[0].text
45
+ content = para.search("content[1]").text
30
46
 
31
47
  keywords = []
32
- para.search("keyword").each do |keyword|
33
- keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
34
- end
48
+ para.search("keyword").each do |keyword|
49
+ keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
50
+ end
35
51
 
36
52
  para_attrs = para_attrs.merge(keywords: keywords, content: content)
37
- section = para.parent.search("info title").text
38
- para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
39
- session[options['collection']].insert(para_attrs)
40
- end
41
- end
53
+ #section = timer{para.parent.search("info[1]/title").text}
54
+ para_attrs.merge(source: source)
55
+ end
42
56
  end
data/lib/utils.rb CHANGED
@@ -271,4 +271,14 @@ module Utils
271
271
  return content
272
272
  end
273
273
 
274
+
275
+ def timer
276
+ time = Time.now
277
+ result = yield
278
+ end_time = Time.now
279
+ delta = end_time - time
280
+ puts "#{delta} seconds"
281
+ result
282
+ end
283
+
274
284
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebook_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-04 00:00:00.000000000 Z
12
+ date: 2013-06-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: uuid
@@ -166,9 +166,9 @@ extra_rdoc_files: []
166
166
  files:
167
167
  - README
168
168
  - CHANGELOG
169
+ - bin/doc_book_import_mongo
169
170
  - bin/ebook_tools
170
171
  - bin/para_import_mongo
171
- - bin/doc_book_import_mongo
172
172
  - bin/para_import_scheduling
173
173
  - bin/xml2json
174
174
  - lib/doc_book_in_mongo.rb