ebook_tools 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +6 -0
- data/ebook_tools.gemspec +3 -3
- data/lib/paras_in_mongo.rb +35 -21
- data/lib/utils.rb +10 -0
- metadata +3 -3
data/CHANGELOG
CHANGED
data/ebook_tools.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{ebook_tools}
|
5
|
-
s.version = '0.1.
|
5
|
+
s.version = '0.1.6'
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Aaron"]
|
9
|
-
s.date = %q{2013-06-
|
9
|
+
s.date = %q{2013-06-07}
|
10
10
|
s.description = %q{电子书工具集.}
|
11
11
|
s.email = %q{yalong1976@gmail.com}
|
12
12
|
s.require_paths = ["lib"]
|
@@ -20,9 +20,9 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.files = [
|
21
21
|
"README",
|
22
22
|
"CHANGELOG",
|
23
|
+
"bin/doc_book_import_mongo",
|
23
24
|
"bin/ebook_tools",
|
24
25
|
"bin/para_import_mongo",
|
25
|
-
"bin/doc_book_import_mongo",
|
26
26
|
"bin/para_import_scheduling",
|
27
27
|
"bin/xml2json",
|
28
28
|
"lib/doc_book_in_mongo.rb",
|
data/lib/paras_in_mongo.rb
CHANGED
@@ -11,32 +11,46 @@ module ParasInMongo
|
|
11
11
|
options = options.stringify_keys
|
12
12
|
session = Moped::Session.new([ "#{options['host']}:#{options['port']}" ])
|
13
13
|
session.use options['database']
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
book_id = doc.search("book")[0]['id']
|
18
|
-
title = doc.search("book info title")[0].text
|
19
|
-
author = doc.search("book info author")[0].text
|
20
|
-
pubdate = doc.search("book info pubdate")[0].text
|
21
|
-
publisher = doc.search("book info publisher")[0].text
|
22
|
-
|
14
|
+
doc =Nokogiri::XML(File.open(filename).read)
|
15
|
+
book = extract_book_info(doc)
|
16
|
+
source ={book: book}
|
23
17
|
paras = doc.search("para[key=yes]")
|
18
|
+
section = nil
|
19
|
+
section_title = ''
|
20
|
+
para_mongo_attrs = paras.map do |para|
|
21
|
+
unless para.parent == section
|
22
|
+
section_title = para.parent.search("info[1]/title").text
|
23
|
+
section = para.parent
|
24
|
+
end
|
25
|
+
extract_para_attrs(para,source.merge(location: {section: section_title}))
|
26
|
+
end
|
27
|
+
para_mongo_attrs.each do |attrs|
|
28
|
+
session[options['collection']].insert(attrs)
|
29
|
+
end
|
30
|
+
end
|
24
31
|
|
25
|
-
|
26
|
-
|
27
|
-
|
32
|
+
private
|
33
|
+
def extract_book_info(doc)
|
34
|
+
book_id = doc.search("book")[0]['id']
|
35
|
+
book_info = doc.search("book/info[1]")
|
36
|
+
title = book_info.search("title[1]").text
|
37
|
+
author = book_info.search("authorgroup/author/personname[1]").text
|
38
|
+
pubdate = book_info.search("pubdate[1]").text
|
39
|
+
publisher = book_info.search("publisher/publishername[1]").text
|
40
|
+
{title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}
|
41
|
+
end
|
42
|
+
|
43
|
+
def extract_para_attrs(para,source)
|
28
44
|
para_attrs = {'_id' => para['id']}
|
29
|
-
content = para.search("content")
|
45
|
+
content = para.search("content[1]").text
|
30
46
|
|
31
47
|
keywords = []
|
32
|
-
|
33
|
-
|
34
|
-
|
48
|
+
para.search("keyword").each do |keyword|
|
49
|
+
keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
|
50
|
+
end
|
35
51
|
|
36
52
|
para_attrs = para_attrs.merge(keywords: keywords, content: content)
|
37
|
-
section = para.parent.search("info
|
38
|
-
para_attrs
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
53
|
+
#section = timer{para.parent.search("info[1]/title").text}
|
54
|
+
para_attrs.merge(source: source)
|
55
|
+
end
|
42
56
|
end
|
data/lib/utils.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebook_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: uuid
|
@@ -166,9 +166,9 @@ extra_rdoc_files: []
|
|
166
166
|
files:
|
167
167
|
- README
|
168
168
|
- CHANGELOG
|
169
|
+
- bin/doc_book_import_mongo
|
169
170
|
- bin/ebook_tools
|
170
171
|
- bin/para_import_mongo
|
171
|
-
- bin/doc_book_import_mongo
|
172
172
|
- bin/para_import_scheduling
|
173
173
|
- bin/xml2json
|
174
174
|
- lib/doc_book_in_mongo.rb
|