bookshark 1.0.0.beta.5 → 1.0.0.pre.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -8
- data/bookshark.gemspec +1 -1
- data/lib/bookshark.rb +19 -1
- data/lib/bookshark/extractors/base.rb +4 -0
- data/lib/bookshark/extractors/bibliographical_book_extractor.rb +172 -0
- data/lib/bookshark/extractors/book_extractor.rb +33 -5
- data/lib/bookshark/extractors/publisher_extractor.rb +1 -0
- data/lib/bookshark/version.rb +1 -1
- data/spec/bookshark_spec.rb +4 -0
- data/spec/test_data/book_103788.json +22 -6
- data/spec/test_data/eager_book_184923.json +22 -6
- data/spec/test_data/search_01.json +185 -55
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e2e0077beb0dadb4ef9df9fcce7b7d55d54e8a6
|
4
|
+
data.tar.gz: 712b2769781e8b1ea55ecb25a501d886a939e307
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2348fa9b757abe44be69c0d21e5c61bc41e87eaae80d11bafff115201f40793d2724d6fe5f518c3c3f26308696febd4c772f8a7e989f085949ea6ab584c7264
|
7
|
+
data.tar.gz: 221cdadd0bcaf5df156cb668465b6a60e7471a5a650ddcce703a602a646dac19dee008a4de088ed037695289d7fa983792d1a5397ea40cfe843a39bf50155bb0
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ The representation of bibliographic metadata in JSON is inspired by [BibJSON](ht
|
|
13
13
|
Add this line to your application's Gemfile:
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
gem 'bookshark', "~> 1.0.0.
|
16
|
+
gem 'bookshark', "~> 1.0.0.pre"
|
17
17
|
```
|
18
18
|
|
19
19
|
And then execute:
|
@@ -145,29 +145,47 @@ The expected result of a book extraction is something like this:
|
|
145
145
|
]
|
146
146
|
},
|
147
147
|
"publisher": {
|
148
|
-
"
|
148
|
+
"text": "Εκδοτικός Οίκος Α. Α. Λιβάνη",
|
149
149
|
"b_id": "271"
|
150
150
|
},
|
151
|
-
"publication_year": "2006",
|
152
|
-
"pages": "326",
|
153
151
|
"isbn": "960-14-1157-7",
|
154
152
|
"isbn_13": "978-960-14-1157-6",
|
155
|
-
"status": "Κυκλοφορεί",
|
156
|
-
"price": "16,31",
|
157
153
|
"award": [
|
154
|
+
|
158
155
|
],
|
159
156
|
"description": "Τι είναι πιο επικίνδυνο, ένα όπλο ή μια πισίνα; Τι κοινό έχουν οι δάσκαλοι με τους παλαιστές του σούμο;...",
|
160
157
|
"category": [
|
161
158
|
{
|
162
159
|
"ddc": "330",
|
163
|
-
"
|
160
|
+
"name": "Οικονομία",
|
164
161
|
"b_id": "142"
|
165
162
|
}
|
166
163
|
],
|
167
|
-
"b_id": "103788"
|
164
|
+
"b_id": "103788",
|
165
|
+
"publication": {
|
166
|
+
"year": "2006",
|
167
|
+
"version": "1",
|
168
|
+
"place": "Αθήνα"
|
169
|
+
},
|
170
|
+
"format": "Βιβλίο",
|
171
|
+
"original_language": "αγγλικά",
|
172
|
+
"original_title": "Freakonomics",
|
173
|
+
"price": "16,31",
|
174
|
+
"availability": "Κυκλοφορεί",
|
175
|
+
"last_update": "27/1/2006",
|
176
|
+
"series": {
|
177
|
+
"name": "Οικονομία",
|
178
|
+
"volume": null
|
179
|
+
},
|
180
|
+
"physical_description": {
|
181
|
+
"pages": "326",
|
182
|
+
"size": "21x14",
|
183
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
184
|
+
}
|
168
185
|
}
|
169
186
|
]
|
170
187
|
}
|
188
|
+
|
171
189
|
```
|
172
190
|
Here is a [Book Sample](https://gist.github.com/dklisiaris/a6f3d6f37806186f3c79) extracted with eager option enabled.
|
173
191
|
|
data/bookshark.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.require_paths = ["lib"]
|
22
22
|
|
23
23
|
spec.add_dependency "nokogiri", "~> 1.6", ">= 1.6.6"
|
24
|
-
spec.add_dependency "sanitize", "~>
|
24
|
+
spec.add_dependency "sanitize", "~> 4.0"
|
25
25
|
spec.add_dependency "json", "~> 1.8"
|
26
26
|
spec.add_dependency "htmlentities", "~> 4.3"
|
27
27
|
|
data/lib/bookshark.rb
CHANGED
@@ -4,6 +4,7 @@ require 'bookshark/storage/file_manager'
|
|
4
4
|
require 'bookshark/extractors/author_extractor'
|
5
5
|
require 'bookshark/extractors/category_extractor'
|
6
6
|
require 'bookshark/extractors/book_extractor'
|
7
|
+
require 'bookshark/extractors/bibliographical_book_extractor'
|
7
8
|
require 'bookshark/extractors/publisher_extractor'
|
8
9
|
require 'bookshark/extractors/search'
|
9
10
|
|
@@ -91,6 +92,23 @@ module Bookshark
|
|
91
92
|
return response
|
92
93
|
end
|
93
94
|
|
95
|
+
|
96
|
+
# def bibliographical_book(options = {})
|
97
|
+
# bibliographical_book_extractor = Biblionet::Extractors::BibliographicalBookExtractor.new
|
98
|
+
|
99
|
+
# uri = "http://www.biblionet.gr/main.asp?page=results&Titlesid=#{options[:id]}"
|
100
|
+
# options[:format] ||= @format
|
101
|
+
|
102
|
+
# book = bibliographical_book_extractor.load_and_extract_book(uri)
|
103
|
+
|
104
|
+
# response = {}
|
105
|
+
# response[:book] = !book.nil? ? [book] : []
|
106
|
+
# response = change_format(response, options[:format])
|
107
|
+
# response = bibliographical_book_extractor.decode_text(response)
|
108
|
+
# end
|
109
|
+
|
110
|
+
# puts Bookshark::Extractor.new(format: 'pretty_json').bibliographical_book(id: 103788)
|
111
|
+
|
94
112
|
def category(options = {})
|
95
113
|
uri = process_options(options, __method__)
|
96
114
|
options[:format] ||= @format
|
@@ -231,7 +249,7 @@ module Bookshark
|
|
231
249
|
local_path = "html_book_pages/#{((id-1)/1000)}/book_#{id}.html"
|
232
250
|
when 'category'
|
233
251
|
url_method = 'index'
|
234
|
-
local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
|
252
|
+
local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
|
235
253
|
else
|
236
254
|
puts "Called from unknown method. Probably its rspec."
|
237
255
|
end
|
@@ -141,6 +141,10 @@ module Biblionet
|
|
141
141
|
# +encoded_text+:: the text which contains encoded entities
|
142
142
|
#
|
143
143
|
def decode_text(encoded_text)
|
144
|
+
self.class.decode_text(encoded_text)
|
145
|
+
end
|
146
|
+
|
147
|
+
def self.decode_text(encoded_text)
|
144
148
|
# encoded_text = File.read(encoded_file_path)
|
145
149
|
coder = HTMLEntities.new
|
146
150
|
coder.decode(encoded_text)
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require_relative 'base'
|
5
|
+
|
6
|
+
|
7
|
+
module Biblionet
|
8
|
+
module Extractors
|
9
|
+
|
10
|
+
class BibliographicalBookExtractor < Base
|
11
|
+
attr_reader :bibliographical_book
|
12
|
+
|
13
|
+
def initialize(uri=nil)
|
14
|
+
super(uri)
|
15
|
+
extract_bibliographical_book unless uri.nil? or @page.nil?
|
16
|
+
end
|
17
|
+
|
18
|
+
def load_and_extract_book(uri=nil)
|
19
|
+
load_page(uri)
|
20
|
+
extract_bibliographical_book unless uri.nil? or @page.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
def extract_bibliographical_book(biblionet_id=@biblionet_id, book_page=@page)
|
24
|
+
# log = Logger.new(File.new(File.dirname(__dir__).to_s + "/logs/book_parsing.log",'a+'))
|
25
|
+
log = Logger.new(STDOUT)
|
26
|
+
|
27
|
+
page = BibliographicalBookDataExtractor.new(book_page)
|
28
|
+
|
29
|
+
# End extraction if BookDataExtractor couldnt create a nodeset
|
30
|
+
return nil if page.nodeset.nil?
|
31
|
+
|
32
|
+
bibliographical_book_hash = Hash.new
|
33
|
+
|
34
|
+
extracted_details = page.details
|
35
|
+
|
36
|
+
bibliographical_book_hash[:original_language] = extracted_details[:original_language]
|
37
|
+
bibliographical_book_hash[:original_title] = extracted_details[:original_title]
|
38
|
+
bibliographical_book_hash[:last_update] = extracted_details[:last_update]
|
39
|
+
bibliographical_book_hash[:cover_type] = extracted_details[:cover_type]
|
40
|
+
bibliographical_book_hash[:availability] = extracted_details[:availability]
|
41
|
+
bibliographical_book_hash[:price] = extracted_details[:price]
|
42
|
+
|
43
|
+
bibliographical_book_hash[:series] = extracted_details[:series]
|
44
|
+
bibliographical_book_hash[:physical_size] = extracted_details[:physical_size]
|
45
|
+
|
46
|
+
bibliographical_book_hash[:format] = extracted_details[:format]
|
47
|
+
|
48
|
+
bibliographical_book_hash[:publisher] = extracted_details[:publisher]
|
49
|
+
bibliographical_book_hash[:publication] = extracted_details[:publication]
|
50
|
+
|
51
|
+
return @bibliographical_book = bibliographical_book_hash
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
class BibliographicalBookDataExtractor
|
57
|
+
attr_reader :nodeset
|
58
|
+
|
59
|
+
def initialize(document)
|
60
|
+
# No need to operate on whole page. Just on part containing the book.
|
61
|
+
content_re = /<!-- CONTENT START -->.*<!-- CONTENT END -->/m
|
62
|
+
if (content_re.match(document)).nil?
|
63
|
+
puts document
|
64
|
+
end
|
65
|
+
content = content_re.match(document)[0] unless (content_re.match(document)).nil?
|
66
|
+
|
67
|
+
# If content is nil, there is something wrong with the html, so return nil
|
68
|
+
if content.nil?
|
69
|
+
@nodeset = nil
|
70
|
+
else
|
71
|
+
@nodeset = Nokogiri::HTML(content)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def size
|
76
|
+
size_regex = /\d+x\d+/
|
77
|
+
end
|
78
|
+
|
79
|
+
def series
|
80
|
+
series_regex = /(?<=\()\p{Word}+( \p{Word}+)* · \d+(?=\))/
|
81
|
+
series_name_regex = /\p{Word}+( \p{Word}+)*(?= ·)/
|
82
|
+
series_volume_regex = /(?<=· )\d+/
|
83
|
+
end
|
84
|
+
|
85
|
+
def details
|
86
|
+
details_hash = {}
|
87
|
+
isbn_regex = /(?<= )\d+-\d+-\d+-\d+(?= |,)/
|
88
|
+
isbn_13_regex = /\d+-\d+-\d+-\d+-\d+/
|
89
|
+
last_update_regex = /\d{1,2}\/\d{1,2}\/\d{2,4}/
|
90
|
+
cover_type_regex = /(?<=\()\p{Word}+( \p{Word}+)?(?=\))/
|
91
|
+
availability_regex = /(?<=\[).+(?=\])/
|
92
|
+
price_regex = /(?<=€ )\d+,\d*/
|
93
|
+
|
94
|
+
@nodeset.xpath("//span[@class='small'][1]").inner_html.split('<br>').each do |detail|
|
95
|
+
detail = BibliographicalBookExtractor.decode_text(detail)
|
96
|
+
|
97
|
+
if detail.start_with? "Γλώσσα πρωτοτύπου:"
|
98
|
+
original_language = detail.gsub(/Γλώσσα πρωτοτύπου:/, "").strip
|
99
|
+
details_hash[:original_language] = original_language
|
100
|
+
elsif detail.start_with? "Τίτλος πρωτοτύπου:"
|
101
|
+
original_title = detail.gsub(/Τίτλος πρωτοτύπου:/, "").strip
|
102
|
+
details_hash[:original_title] = original_title
|
103
|
+
end
|
104
|
+
|
105
|
+
details_hash[:isbn] = detail[isbn_regex] if detail =~ isbn_regex
|
106
|
+
|
107
|
+
details_hash[:isbn_13] = detail[isbn_13_regex] if detail =~ isbn_13_regex
|
108
|
+
|
109
|
+
details_hash[:last_update] = detail[last_update_regex] if detail =~ last_update_regex
|
110
|
+
|
111
|
+
details_hash[:cover_type] = detail[cover_type_regex] if detail =~ cover_type_regex
|
112
|
+
|
113
|
+
details_hash[:availability] = detail[availability_regex] if detail =~ availability_regex
|
114
|
+
|
115
|
+
details_hash[:price] = detail[price_regex] if detail =~ price_regex
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
pre_details_text = @nodeset.xpath("//span[@class='small'][1]/preceding::text()").text
|
120
|
+
pre_details_text = BibliographicalBookExtractor.decode_text(pre_details_text)
|
121
|
+
|
122
|
+
series_regex = /(?<=\()\p{Word}+( \S)?( \p{Word}+( \S)?)* · \d+(?=\))/
|
123
|
+
series_regex_no_vol = /(?<=\()\p{Word}+( \S)?( \p{Word}+( \S)?)*(?=\))/
|
124
|
+
series_name_regex = /\p{Word}+( \S)?( \p{Word}+( \S)?)*(?= ·)/
|
125
|
+
series_volume_regex = /(?<=· )\d+/
|
126
|
+
physical_size_regex = /\d+x\d+/
|
127
|
+
|
128
|
+
series_hash = {}
|
129
|
+
if pre_details_text =~ series_regex
|
130
|
+
series = pre_details_text[series_regex]
|
131
|
+
series_hash[:name] = series[series_name_regex] if series =~ series_name_regex
|
132
|
+
series_hash[:volume] = series[series_volume_regex] if series =~ series_volume_regex
|
133
|
+
elsif pre_details_text =~ series_regex_no_vol
|
134
|
+
series = pre_details_text[series_regex_no_vol]
|
135
|
+
series_hash[:name] = series
|
136
|
+
series_hash[:volume] = nil
|
137
|
+
end
|
138
|
+
|
139
|
+
details_hash[:series] = series_hash
|
140
|
+
|
141
|
+
details_hash[:physical_size] = (pre_details_text =~ physical_size_regex) ? pre_details_text[physical_size_regex] : nil
|
142
|
+
|
143
|
+
format_regex = /(?<=\[).+(?=\])/
|
144
|
+
|
145
|
+
after_title_text = @nodeset.xpath("//a[@class='booklink' and @href[contains(.,'/book/') ]][1]").first.next_sibling.text.strip
|
146
|
+
format = after_title_text[format_regex] if after_title_text =~ format_regex
|
147
|
+
|
148
|
+
details_hash[:format] = format.nil? ? 'Βιβλίο' : format
|
149
|
+
|
150
|
+
publisher_node = @nodeset.xpath("//a[@class='booklink' and @href[contains(.,'/com/') ]][1]").first
|
151
|
+
publisher_hash = {}
|
152
|
+
publisher_hash[:text] = publisher_node.text
|
153
|
+
publisher_hash[:b_id] = (publisher_node[:href].split("/"))[2]
|
154
|
+
|
155
|
+
pre_publisher_text = BibliographicalBookExtractor.decode_text(publisher_node.previous_sibling.text)
|
156
|
+
after_publisher_text = BibliographicalBookExtractor.decode_text(publisher_node.next_sibling.text)
|
157
|
+
|
158
|
+
publication_hash = {}
|
159
|
+
publication_hash[:year] = after_publisher_text[/(?<=, )\d+(?=\.)/]
|
160
|
+
publication_hash[:version] = pre_details_text[/(?<=- )\d+(?=η)/]
|
161
|
+
publication_hash[:place] = pre_details_text[/(?<=- )\p{Word}+( \S)?( \p{Word}+( \S)?)*(?= :)/]
|
162
|
+
|
163
|
+
details_hash[:publisher] = publisher_hash
|
164
|
+
details_hash[:publication] = publication_hash
|
165
|
+
|
166
|
+
details_hash
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
4
|
require_relative 'base'
|
5
|
+
require_relative 'bibliographical_book_extractor'
|
5
6
|
require 'sanitize'
|
6
7
|
|
7
8
|
module Biblionet
|
@@ -169,12 +170,12 @@ module Biblionet
|
|
169
170
|
|
170
171
|
details_hash = proccess_details(details)
|
171
172
|
|
172
|
-
book_hash[:publication_year] = details_hash[:publication_year]
|
173
|
-
book_hash[:pages] = details_hash[:pages]
|
173
|
+
# book_hash[:publication_year] = details_hash[:publication_year]
|
174
|
+
# book_hash[:pages] = details_hash[:pages]
|
174
175
|
book_hash[:isbn] = details_hash[:isbn]
|
175
176
|
book_hash[:isbn_13] = details_hash[:isbn_13].nil? ? nil : details_hash[:isbn_13]
|
176
|
-
book_hash[:status] = details_hash[:status]
|
177
|
-
book_hash[:price] = details_hash[:price]
|
177
|
+
# book_hash[:status] = details_hash[:status]
|
178
|
+
# book_hash[:price] = details_hash[:price]
|
178
179
|
book_hash[:award] = page.awards
|
179
180
|
|
180
181
|
|
@@ -192,7 +193,34 @@ module Biblionet
|
|
192
193
|
|
193
194
|
|
194
195
|
book_hash[:category] = ddcs
|
195
|
-
book_hash[:b_id] = biblionet_id
|
196
|
+
book_hash[:b_id] = biblionet_id
|
197
|
+
|
198
|
+
uri = "http://www.biblionet.gr/main.asp?page=results&Titlesid=#{biblionet_id}"
|
199
|
+
|
200
|
+
bibliographical_book_extractor = Biblionet::Extractors::BibliographicalBookExtractor.new
|
201
|
+
bibliographical_details = bibliographical_book_extractor.load_and_extract_book(uri)
|
202
|
+
|
203
|
+
book_hash[:publisher] = bibliographical_details[:publisher]
|
204
|
+
book_hash[:publication] = bibliographical_details[:publication]
|
205
|
+
|
206
|
+
book_hash[:format] = bibliographical_details[:format]
|
207
|
+
|
208
|
+
book_hash[:original_language] = bibliographical_details[:original_language]
|
209
|
+
book_hash[:original_title] = bibliographical_details[:original_title]
|
210
|
+
|
211
|
+
book_hash[:price] = bibliographical_details[:price]
|
212
|
+
book_hash[:availability] = bibliographical_details[:availability]
|
213
|
+
book_hash[:last_update] = bibliographical_details[:last_update]
|
214
|
+
|
215
|
+
book_hash[:series] = bibliographical_details[:series]
|
216
|
+
|
217
|
+
physical_description_hash = {}
|
218
|
+
physical_description_hash[:pages] = details_hash[:pages]
|
219
|
+
physical_description_hash[:size] = bibliographical_details[:physical_size]
|
220
|
+
physical_description_hash[:cover_type] = bibliographical_details[:cover_type]
|
221
|
+
|
222
|
+
book_hash[:physical_description] = physical_description_hash
|
223
|
+
|
196
224
|
|
197
225
|
return @book = book_hash
|
198
226
|
end
|
@@ -99,6 +99,7 @@ module Biblionet
|
|
99
99
|
headquarters_hash = Hash[headquarters_hash.map {|k, v| [mappings[k], v] }]
|
100
100
|
headquarters_hash[:telephone] = [headquarters_hash[:telephone]] unless headquarters_hash[:telephone].kind_of?(Array) or headquarters_hash[:telephone].nil?
|
101
101
|
headquarters_hash[:website] = headquarters_hash[:website].split(',').map(&:strip) if (headquarters_hash[:website] and headquarters_hash[:website].include? ',')
|
102
|
+
headquarters_hash[:address] = [headquarters_hash[:address]] unless headquarters_hash[:address].kind_of?(Array)
|
102
103
|
|
103
104
|
return headquarters_hash
|
104
105
|
end
|
data/lib/bookshark/version.rb
CHANGED
data/spec/bookshark_spec.rb
CHANGED
@@ -23,15 +23,11 @@
|
|
23
23
|
]
|
24
24
|
},
|
25
25
|
"publisher": {
|
26
|
-
"
|
26
|
+
"text": "Εκδοτικός Οίκος Α. Α. Λιβάνη",
|
27
27
|
"b_id": "271"
|
28
28
|
},
|
29
|
-
"publication_year": "2006",
|
30
|
-
"pages": "326",
|
31
29
|
"isbn": "960-14-1157-7",
|
32
30
|
"isbn_13": "978-960-14-1157-6",
|
33
|
-
"status": "Κυκλοφορεί",
|
34
|
-
"price": "16,31",
|
35
31
|
"award": [
|
36
32
|
|
37
33
|
],
|
@@ -43,7 +39,27 @@
|
|
43
39
|
"b_id": "142"
|
44
40
|
}
|
45
41
|
],
|
46
|
-
"b_id": "103788"
|
42
|
+
"b_id": "103788",
|
43
|
+
"publication": {
|
44
|
+
"year": "2006",
|
45
|
+
"version": "1",
|
46
|
+
"place": "Αθήνα"
|
47
|
+
},
|
48
|
+
"format": "Βιβλίο",
|
49
|
+
"original_language": "αγγλικά",
|
50
|
+
"original_title": "Freakonomics",
|
51
|
+
"price": "16,31",
|
52
|
+
"availability": "Κυκλοφορεί",
|
53
|
+
"last_update": "27/1/2006",
|
54
|
+
"series": {
|
55
|
+
"name": "Οικονομία",
|
56
|
+
"volume": null
|
57
|
+
},
|
58
|
+
"physical_description": {
|
59
|
+
"pages": "326",
|
60
|
+
"size": "21x14",
|
61
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
62
|
+
}
|
47
63
|
}
|
48
64
|
]
|
49
65
|
}
|
@@ -71,12 +71,8 @@
|
|
71
71
|
"b_id": "112"
|
72
72
|
}
|
73
73
|
],
|
74
|
-
"publication_year": "2012",
|
75
|
-
"pages": "345",
|
76
74
|
"isbn": "978-960-524-394-4",
|
77
75
|
"isbn_13": null,
|
78
|
-
"status": "Κυκλοφορεί",
|
79
|
-
"price": "16,00",
|
80
76
|
"award": [
|
81
77
|
|
82
78
|
],
|
@@ -209,7 +205,27 @@
|
|
209
205
|
}
|
210
206
|
}
|
211
207
|
],
|
212
|
-
"b_id": "184923"
|
208
|
+
"b_id": "184923",
|
209
|
+
"publication": {
|
210
|
+
"year": "2012",
|
211
|
+
"version": "1",
|
212
|
+
"place": "Ηράκλειο Κρήτης"
|
213
|
+
},
|
214
|
+
"format": "Βιβλίο",
|
215
|
+
"original_language": null,
|
216
|
+
"original_title": null,
|
217
|
+
"price": "16,00",
|
218
|
+
"availability": "Κυκλοφορεί",
|
219
|
+
"last_update": "12/12/2012",
|
220
|
+
"series": {
|
221
|
+
"name": "Εκλαΐκευση της Επιστήμης",
|
222
|
+
"volume": null
|
223
|
+
},
|
224
|
+
"physical_description": {
|
225
|
+
"pages": "345",
|
226
|
+
"size": "24x17",
|
227
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
228
|
+
}
|
213
229
|
}
|
214
230
|
]
|
215
|
-
}
|
231
|
+
}
|
@@ -13,15 +13,11 @@
|
|
13
13
|
"contributors": {
|
14
14
|
},
|
15
15
|
"publisher": {
|
16
|
-
"
|
16
|
+
"text": "Λογοσοφία",
|
17
17
|
"b_id": "7628"
|
18
18
|
},
|
19
|
-
"publication_year": "2007",
|
20
|
-
"pages": "181",
|
21
19
|
"isbn": "978-960-89288-3-1",
|
22
20
|
"isbn_13": null,
|
23
|
-
"status": "Κυκλοφορεί",
|
24
|
-
"price": "13,52",
|
25
21
|
"award": [
|
26
22
|
|
27
23
|
],
|
@@ -33,7 +29,25 @@
|
|
33
29
|
"b_id": "3"
|
34
30
|
}
|
35
31
|
],
|
36
|
-
"b_id": "119000"
|
32
|
+
"b_id": "119000",
|
33
|
+
"publication": {
|
34
|
+
"year": "2007",
|
35
|
+
"version": "1",
|
36
|
+
"place": "Αθήνα"
|
37
|
+
},
|
38
|
+
"format": "Βιβλίο",
|
39
|
+
"original_language": null,
|
40
|
+
"original_title": null,
|
41
|
+
"price": "13,52",
|
42
|
+
"availability": "Κυκλοφορεί",
|
43
|
+
"last_update": "9/5/2007",
|
44
|
+
"series": {
|
45
|
+
},
|
46
|
+
"physical_description": {
|
47
|
+
"pages": "181",
|
48
|
+
"size": "21x14",
|
49
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
50
|
+
}
|
37
51
|
},
|
38
52
|
{
|
39
53
|
"title": "Σημεία και τέρατα της οικονομίας",
|
@@ -58,15 +72,11 @@
|
|
58
72
|
]
|
59
73
|
},
|
60
74
|
"publisher": {
|
61
|
-
"
|
75
|
+
"text": "Εκδοτικός Οίκος Α. Α. Λιβάνη",
|
62
76
|
"b_id": "271"
|
63
77
|
},
|
64
|
-
"publication_year": "2006",
|
65
|
-
"pages": "326",
|
66
78
|
"isbn": "960-14-1157-7",
|
67
79
|
"isbn_13": "978-960-14-1157-6",
|
68
|
-
"status": "Κυκλοφορεί",
|
69
|
-
"price": "16,31",
|
70
80
|
"award": [
|
71
81
|
|
72
82
|
],
|
@@ -78,7 +88,27 @@
|
|
78
88
|
"b_id": "142"
|
79
89
|
}
|
80
90
|
],
|
81
|
-
"b_id": "103788"
|
91
|
+
"b_id": "103788",
|
92
|
+
"publication": {
|
93
|
+
"year": "2006",
|
94
|
+
"version": "1",
|
95
|
+
"place": "Αθήνα"
|
96
|
+
},
|
97
|
+
"format": "Βιβλίο",
|
98
|
+
"original_language": "αγγλικά",
|
99
|
+
"original_title": "Freakonomics",
|
100
|
+
"price": "16,31",
|
101
|
+
"availability": "Κυκλοφορεί",
|
102
|
+
"last_update": "27/1/2006",
|
103
|
+
"series": {
|
104
|
+
"name": "Οικονομία",
|
105
|
+
"volume": null
|
106
|
+
},
|
107
|
+
"physical_description": {
|
108
|
+
"pages": "326",
|
109
|
+
"size": "21x14",
|
110
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
111
|
+
}
|
82
112
|
},
|
83
113
|
{
|
84
114
|
"title": "Και άλλα σημεία και τέρατα από την ιστορία",
|
@@ -103,15 +133,11 @@
|
|
103
133
|
]
|
104
134
|
},
|
105
135
|
"publisher": {
|
106
|
-
"
|
136
|
+
"text": "Modern Times",
|
107
137
|
"b_id": "191"
|
108
138
|
},
|
109
|
-
"publication_year": "2004",
|
110
|
-
"pages": "62",
|
111
139
|
"isbn": "960-397-927-9",
|
112
140
|
"isbn_13": "978-960-397-927-2",
|
113
|
-
"status": "Κυκλοφορεί",
|
114
|
-
"price": "15,14",
|
115
141
|
"award": [
|
116
142
|
|
117
143
|
],
|
@@ -123,7 +149,25 @@
|
|
123
149
|
"b_id": "2456"
|
124
150
|
}
|
125
151
|
],
|
126
|
-
"b_id": "87815"
|
152
|
+
"b_id": "87815",
|
153
|
+
"publication": {
|
154
|
+
"year": "2004",
|
155
|
+
"version": null,
|
156
|
+
"place": "Αθήνα"
|
157
|
+
},
|
158
|
+
"format": "Βιβλίο",
|
159
|
+
"original_language": "αγγλικά",
|
160
|
+
"original_title": "Even more horrible history",
|
161
|
+
"price": "15,14",
|
162
|
+
"availability": "Κυκλοφορεί",
|
163
|
+
"last_update": null,
|
164
|
+
"series": {
|
165
|
+
},
|
166
|
+
"physical_description": {
|
167
|
+
"pages": "62",
|
168
|
+
"size": "28x22",
|
169
|
+
"cover_type": "Σκληρό εξώφυλλο"
|
170
|
+
}
|
127
171
|
},
|
128
172
|
{
|
129
173
|
"title": "Σημεία και τέρατα από την ιστορία",
|
@@ -148,15 +192,11 @@
|
|
148
192
|
]
|
149
193
|
},
|
150
194
|
"publisher": {
|
151
|
-
"
|
195
|
+
"text": "Modern Times",
|
152
196
|
"b_id": "191"
|
153
197
|
},
|
154
|
-
"publication_year": "2004",
|
155
|
-
"pages": "78",
|
156
198
|
"isbn": "960-397-926-0",
|
157
199
|
"isbn_13": "978-960-397-926-5",
|
158
|
-
"status": "Κυκλοφορεί",
|
159
|
-
"price": "15,14",
|
160
200
|
"award": [
|
161
201
|
|
162
202
|
],
|
@@ -168,7 +208,25 @@
|
|
168
208
|
"b_id": "2456"
|
169
209
|
}
|
170
210
|
],
|
171
|
-
"b_id": "87812"
|
211
|
+
"b_id": "87812",
|
212
|
+
"publication": {
|
213
|
+
"year": "2004",
|
214
|
+
"version": null,
|
215
|
+
"place": "Αθήνα"
|
216
|
+
},
|
217
|
+
"format": "Βιβλίο",
|
218
|
+
"original_language": "αγγλικά",
|
219
|
+
"original_title": "Horrible history",
|
220
|
+
"price": "15,14",
|
221
|
+
"availability": "Κυκλοφορεί",
|
222
|
+
"last_update": null,
|
223
|
+
"series": {
|
224
|
+
},
|
225
|
+
"physical_description": {
|
226
|
+
"pages": "78",
|
227
|
+
"size": "28x22",
|
228
|
+
"cover_type": "Σκληρό εξώφυλλο"
|
229
|
+
}
|
172
230
|
},
|
173
231
|
{
|
174
232
|
"title": "Σημεία και τέρατα",
|
@@ -183,15 +241,11 @@
|
|
183
241
|
"contributors": {
|
184
242
|
},
|
185
243
|
"publisher": {
|
186
|
-
"
|
244
|
+
"text": "Κέδρος",
|
187
245
|
"b_id": "21"
|
188
246
|
},
|
189
|
-
"publication_year": "1994",
|
190
|
-
"pages": "126",
|
191
247
|
"isbn": "960-04-0941-2",
|
192
248
|
"isbn_13": "978-960-04-0941-3",
|
193
|
-
"status": "Κυκλοφορεί",
|
194
|
-
"price": "9,17",
|
195
249
|
"award": [
|
196
250
|
|
197
251
|
],
|
@@ -203,7 +257,25 @@
|
|
203
257
|
"b_id": "9"
|
204
258
|
}
|
205
259
|
],
|
206
|
-
"b_id": "15839"
|
260
|
+
"b_id": "15839",
|
261
|
+
"publication": {
|
262
|
+
"year": "1994",
|
263
|
+
"version": "2",
|
264
|
+
"place": "Αθήνα"
|
265
|
+
},
|
266
|
+
"format": "Βιβλίο",
|
267
|
+
"original_language": null,
|
268
|
+
"original_title": null,
|
269
|
+
"price": "9,17",
|
270
|
+
"availability": "Κυκλοφορεί",
|
271
|
+
"last_update": "28/7/2010",
|
272
|
+
"series": {
|
273
|
+
},
|
274
|
+
"physical_description": {
|
275
|
+
"pages": "126",
|
276
|
+
"size": "21x14",
|
277
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
278
|
+
}
|
207
279
|
},
|
208
280
|
{
|
209
281
|
"title": "Επίσημη αγραμματοσύνη",
|
@@ -218,15 +290,11 @@
|
|
218
290
|
"contributors": {
|
219
291
|
},
|
220
292
|
"publisher": {
|
221
|
-
"
|
293
|
+
"text": "Περιοδικό Πνευματική Ζωή",
|
222
294
|
"b_id": "6770"
|
223
295
|
},
|
224
|
-
"publication_year": null,
|
225
|
-
"pages": "48",
|
226
296
|
"isbn": null,
|
227
297
|
"isbn_13": null,
|
228
|
-
"status": "Κυκλοφορεί",
|
229
|
-
"price": null,
|
230
298
|
"award": [
|
231
299
|
|
232
300
|
],
|
@@ -238,7 +306,27 @@
|
|
238
306
|
"b_id": "1459"
|
239
307
|
}
|
240
308
|
],
|
241
|
-
"b_id": "77381"
|
309
|
+
"b_id": "77381",
|
310
|
+
"publication": {
|
311
|
+
"year": null,
|
312
|
+
"version": null,
|
313
|
+
"place": "Αθήνα"
|
314
|
+
},
|
315
|
+
"format": "Βιβλίο",
|
316
|
+
"original_language": null,
|
317
|
+
"original_title": null,
|
318
|
+
"price": null,
|
319
|
+
"availability": "Κυκλοφορεί",
|
320
|
+
"last_update": null,
|
321
|
+
"series": {
|
322
|
+
"name": "Νεοελληνικά Αφιερώματα",
|
323
|
+
"volume": "21"
|
324
|
+
},
|
325
|
+
"physical_description": {
|
326
|
+
"pages": "48",
|
327
|
+
"size": "24x17",
|
328
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
329
|
+
}
|
242
330
|
},
|
243
331
|
{
|
244
332
|
"title": "Ο όλεθρος της πυραμίδος Καμπαλά",
|
@@ -259,15 +347,11 @@
|
|
259
347
|
]
|
260
348
|
},
|
261
349
|
"publisher": {
|
262
|
-
"
|
350
|
+
"text": "Μπίμπης Στερέωμα",
|
263
351
|
"b_id": "244"
|
264
352
|
},
|
265
|
-
"publication_year": null,
|
266
|
-
"pages": "269",
|
267
353
|
"isbn": null,
|
268
354
|
"isbn_13": null,
|
269
|
-
"status": "Κυκλοφορεί",
|
270
|
-
"price": "6,85",
|
271
355
|
"award": [
|
272
356
|
|
273
357
|
],
|
@@ -279,7 +363,25 @@
|
|
279
363
|
"b_id": "179"
|
280
364
|
}
|
281
365
|
],
|
282
|
-
"b_id": "46856"
|
366
|
+
"b_id": "46856",
|
367
|
+
"publication": {
|
368
|
+
"year": null,
|
369
|
+
"version": null,
|
370
|
+
"place": "Θεσσαλονίκη"
|
371
|
+
},
|
372
|
+
"format": "Βιβλίο",
|
373
|
+
"original_language": null,
|
374
|
+
"original_title": null,
|
375
|
+
"price": "6,85",
|
376
|
+
"availability": "Κυκλοφορεί",
|
377
|
+
"last_update": null,
|
378
|
+
"series": {
|
379
|
+
},
|
380
|
+
"physical_description": {
|
381
|
+
"pages": "269",
|
382
|
+
"size": "21x14",
|
383
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
384
|
+
}
|
283
385
|
},
|
284
386
|
{
|
285
387
|
"title": "Η επανάσταση κατά της νέας τάξης",
|
@@ -294,15 +396,11 @@
|
|
294
396
|
"contributors": {
|
295
397
|
},
|
296
398
|
"publisher": {
|
297
|
-
"
|
399
|
+
"text": "Μπίμπης Στερέωμα",
|
298
400
|
"b_id": "244"
|
299
401
|
},
|
300
|
-
"publication_year": null,
|
301
|
-
"pages": "71",
|
302
402
|
"isbn": null,
|
303
403
|
"isbn_13": null,
|
304
|
-
"status": "Κυκλοφορεί",
|
305
|
-
"price": "3,73",
|
306
404
|
"award": [
|
307
405
|
|
308
406
|
],
|
@@ -314,7 +412,25 @@
|
|
314
412
|
"b_id": "2336"
|
315
413
|
}
|
316
414
|
],
|
317
|
-
"b_id": "46763"
|
415
|
+
"b_id": "46763",
|
416
|
+
"publication": {
|
417
|
+
"year": null,
|
418
|
+
"version": null,
|
419
|
+
"place": "Θεσσαλονίκη"
|
420
|
+
},
|
421
|
+
"format": "Βιβλίο",
|
422
|
+
"original_language": null,
|
423
|
+
"original_title": null,
|
424
|
+
"price": "3,73",
|
425
|
+
"availability": "Κυκλοφορεί",
|
426
|
+
"last_update": null,
|
427
|
+
"series": {
|
428
|
+
},
|
429
|
+
"physical_description": {
|
430
|
+
"pages": "71",
|
431
|
+
"size": "21x14",
|
432
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
433
|
+
}
|
318
434
|
},
|
319
435
|
{
|
320
436
|
"title": "Επιχείρησις: Μαργαριτάρια",
|
@@ -329,15 +445,11 @@
|
|
329
445
|
"contributors": {
|
330
446
|
},
|
331
447
|
"publisher": {
|
332
|
-
"
|
448
|
+
"text": "Δωδώνη Εκδοτική ΕΠΕ",
|
333
449
|
"b_id": "1"
|
334
450
|
},
|
335
|
-
"publication_year": null,
|
336
|
-
"pages": "188",
|
337
451
|
"isbn": "960-248-541-8",
|
338
452
|
"isbn_13": "978-960-248-541-5",
|
339
|
-
"status": "Κυκλοφορεί",
|
340
|
-
"price": "10,60",
|
341
453
|
"award": [
|
342
454
|
|
343
455
|
],
|
@@ -349,7 +461,25 @@
|
|
349
461
|
"b_id": "1309"
|
350
462
|
}
|
351
463
|
],
|
352
|
-
"b_id": "33301"
|
464
|
+
"b_id": "33301",
|
465
|
+
"publication": {
|
466
|
+
"year": null,
|
467
|
+
"version": null,
|
468
|
+
"place": "Αθήνα"
|
469
|
+
},
|
470
|
+
"format": "Βιβλίο",
|
471
|
+
"original_language": null,
|
472
|
+
"original_title": null,
|
473
|
+
"price": "10,60",
|
474
|
+
"availability": "Κυκλοφορεί",
|
475
|
+
"last_update": null,
|
476
|
+
"series": {
|
477
|
+
},
|
478
|
+
"physical_description": {
|
479
|
+
"pages": "188",
|
480
|
+
"size": "20x13",
|
481
|
+
"cover_type": "Μαλακό εξώφυλλο"
|
482
|
+
}
|
353
483
|
}
|
354
484
|
]
|
355
|
-
}
|
485
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookshark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.
|
4
|
+
version: 1.0.0.pre.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitris Klisiaris
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -36,14 +36,14 @@ dependencies:
|
|
36
36
|
requirements:
|
37
37
|
- - "~>"
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
39
|
+
version: '4.0'
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '4.0'
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: json
|
49
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,6 +137,7 @@ files:
|
|
137
137
|
- lib/bookshark/crawlers/publisher_crawler.rb
|
138
138
|
- lib/bookshark/extractors/author_extractor.rb
|
139
139
|
- lib/bookshark/extractors/base.rb
|
140
|
+
- lib/bookshark/extractors/bibliographical_book_extractor.rb
|
140
141
|
- lib/bookshark/extractors/book_extractor.rb
|
141
142
|
- lib/bookshark/extractors/category_extractor.rb
|
142
143
|
- lib/bookshark/extractors/publisher_extractor.rb
|