bookshark 1.0.0.beta.3 → 1.0.0.beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a457d8fc6d56572ae211107913ac4ec260745cc
4
- data.tar.gz: 310068a2c23c898c732fa6243fe4697339e39dea
3
+ metadata.gz: a592c6e055f501c19f4a7dea23bad79446b6b28c
4
+ data.tar.gz: 797820d896b398e6294f9804a4a42f151682a027
5
5
  SHA512:
6
- metadata.gz: 81074707b067456d71340d23cfde9b6441dc69ffa860291025e5644d44698b10a065872b57d66ca1207275c5507815a95de329983f49d301a37095da0cf88290
7
- data.tar.gz: 292cf7099f3f62e264e0bc113747f611bec87d49ce87f8890e49c9f174cb89b3cf5899ff8915d0874d250307185bef1d8ecd71b9c466342f9d53269e8da292a6
6
+ metadata.gz: 1f8f7b1b0b0f7312549964153afe8198aab00b0acabdb9693c5e9f7c8242e90a19357fb3507397df6cd5b3278cb1ceb22d2b45dfb4646851e698b5cf2bfeb96d
7
+ data.tar.gz: 73e4f60098595c1e5b20101bf66c5d24d3cc2e2659c5776a7973635a9789149e5a6d2a2a4a69ff7af126314f25f9b2e31824fd131879f0b3b5939eff55462604
data/.gitignore CHANGED
@@ -8,7 +8,12 @@
8
8
  /pkg/
9
9
  /spec/reports/
10
10
  /tmp/
11
+ /lib/bookshark/storage/html_book_pages/
12
+ /lib/bookshark/storage/html_author_pages/
11
13
  /lib/bookshark/storage/html_publisher_pages/
14
+ /lib/bookshark/storage/json_book_pages/
15
+ /lib/bookshark/storage/json_author_pages/
16
+ /lib/bookshark/storage/json_publisher_pages/
12
17
  /lib/bookshark/logs/*.log
13
18
  *.bundle
14
19
  *.so
data/.travis.yml CHANGED
@@ -1,3 +1,5 @@
1
+ notifications:
2
+ slack: bibliography:gIEyQx7yEqCDHlXaBxBLsKlo
1
3
  language: ruby
2
4
  rvm:
3
5
  - 2.2.0
data/bookshark.gemspec CHANGED
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
25
25
  spec.add_dependency "json", "~> 1.8"
26
26
  spec.add_dependency "htmlentities", "~> 4.3"
27
27
 
28
- spec.add_development_dependency "bundler", "~> 1.9"
28
+ spec.add_development_dependency "bundler", ">= 1.6"
29
29
  spec.add_development_dependency "rake", "~> 10.0"
30
30
  spec.add_development_dependency 'rspec', "~> 3.2"
31
31
  end
data/lib/bookshark.rb CHANGED
@@ -119,6 +119,54 @@ module Bookshark
119
119
  return response
120
120
  end
121
121
 
122
+ def books_from_storage
123
+ extract_from_storage_and_save('book', 'html_book_pages', 'json_book_pages')
124
+ end
125
+
126
+ def authors_from_storage
127
+ extract_from_storage_and_save('author', 'html_author_pages', 'json_author_pages')
128
+ end
129
+
130
+ def publishers_from_storage
131
+ extract_from_storage_and_save('publisher', 'html_publisher_pages', 'json_publisher_pages')
132
+ end
133
+
134
+ def categories_from_storage
135
+ extract_from_storage_and_save('category', 'html_category_pages', 'json_category_pages')
136
+ end
137
+
138
+ def extract_from_storage_and_save(metadata_type, source_dir, target_dir)
139
+ list_directories(path: Bookshark.path_to_storage + '/' + source_dir).each do |dir|
140
+ dir_to_save = dir.gsub(source_dir, target_dir)
141
+
142
+ list_files(path: dir, extension: 'html', all:true).each do |file|
143
+ puts "Extracting from file: " + file.to_s
144
+
145
+ # Extract publisher metadata form local file.
146
+ options = {uri: file, format: 'pretty_json', local: true}
147
+
148
+ case metadata_type
149
+ when 'author'
150
+ record = author(options)
151
+ when 'publisher'
152
+ record = publisher(options)
153
+ when 'book'
154
+ record = book(options)
155
+ when 'category'
156
+ record = category(options)
157
+ end
158
+
159
+ # Prepare a path to save the new file.
160
+ filename = File.basename(file,".*")
161
+ path_to_save = "#{dir_to_save}#{filename}.json"
162
+
163
+ # Save to file.
164
+ save_to("#{path_to_save}", record)
165
+
166
+ end # unless File.directory?(dir_to_save) # if dir.end_with? '/195/'
167
+ end
168
+ end
169
+
122
170
  def parse_all_categories(will_save=false)
123
171
  # list_directories('raw_ddc_pages').each do |dir|
124
172
  # p dir
@@ -166,9 +214,8 @@ module Bookshark
166
214
  private
167
215
 
168
216
  def process_options(options = {}, caller = nil)
169
- # puts caller_locations(1,1)[0].label
170
- # options[:format] ||= @format
171
- puts caller
217
+ # puts "Called from method: " + caller.to_s
218
+
172
219
  id = options[:id]
173
220
 
174
221
  if id
@@ -28,7 +28,7 @@ module Biblionet
28
28
 
29
29
  headquarters = page.headquarters
30
30
  bookstores = page.bookstores
31
- bookstores['Έδρα'] = headquarters
31
+ bookstores['Έδρα'] = headquarters unless headquarters.all? {|k,v| v.nil? or v.empty?}
32
32
 
33
33
  publisher_hash = {}
34
34
  publisher_hash[:name] = page.name
@@ -97,7 +97,7 @@ module Biblionet
97
97
  # Change keys. Use the same as in bookstores.
98
98
  mappings = {"Διεύθυνση" => :address, "Τηλ" => :telephone, "FAX" => :fax, "E-mail" => :email, "Web site" => :website}
99
99
  headquarters_hash = Hash[headquarters_hash.map {|k, v| [mappings[k], v] }]
100
- headquarters_hash[:telephone] = [headquarters_hash[:telephone]] unless headquarters_hash[:telephone].kind_of?(Array)
100
+ headquarters_hash[:telephone] = [headquarters_hash[:telephone]] unless headquarters_hash[:telephone].kind_of?(Array) or headquarters_hash[:telephone].nil?
101
101
  headquarters_hash[:website] = headquarters_hash[:website].split(',').map(&:strip) if (headquarters_hash[:website] and headquarters_hash[:website].include? ',')
102
102
 
103
103
  return headquarters_hash
@@ -1,3 +1,3 @@
1
1
  module Bookshark
2
- VERSION = "1.0.0.beta.3"
2
+ VERSION = "1.0.0.beta.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bookshark
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.beta.3
4
+ version: 1.0.0.beta.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitris Klisiaris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-13 00:00:00.000000000 Z
11
+ date: 2015-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -76,16 +76,16 @@ dependencies:
76
76
  name: bundler
77
77
  requirement: !ruby/object:Gem::Requirement
78
78
  requirements:
79
- - - "~>"
79
+ - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '1.9'
81
+ version: '1.6'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
- - - "~>"
86
+ - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '1.9'
88
+ version: '1.6'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: rake
91
91
  requirement: !ruby/object:Gem::Requirement