gared 0.0.22 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 12ad7a1d34ae412a0828d5a9ea7297743d631781248f2ecbcfe646e316cfc42e
4
- data.tar.gz: 4c2dab22f6984e50971164a9c984b6034053e0c86a717130afe07b7c743b5fcf
3
+ metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
+ data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
5
5
  SHA512:
6
- metadata.gz: 3825a5a4bac92b0bb92f3b5fe5955c2a7780a489ceaae7718fc4eead41fa9788b567b2359a8a75efa51c88b8d65623580d27b93e3e29d95f41858ea9b76e66e9
7
- data.tar.gz: 299626204c6f0b3f8cffdb46cc0548c41d7583fc094e3d0455ccead2f2198c4ec9805914036c8c5144874d6487ce980024ce52d016ce3955336351123204a9a2
6
+ metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
+ data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
data/lib/gared.rb CHANGED
@@ -7,6 +7,7 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
12
 
12
13
  end
data/lib/gared/jpress.rb CHANGED
@@ -1,35 +1,4 @@
1
1
  module Gared
2
2
  class Jpress
3
- require 'watir'
4
-
5
- def initialize
6
- @browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
7
- end
8
-
9
- def query_persons(q)
10
- end
11
-
12
- def query_person(person)
13
- end
14
-
15
- def query_publications(q)
16
- end
17
-
18
- def query_publication(publication)
19
- end
20
-
21
- # return in-memory Publication instances with associated Holdings
22
- def query_publications_by_person(person, ctx = nil)
23
- @browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
24
- @browser.wait
25
-
26
- t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
27
- t.set(person)
28
- @browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
29
- @browser.wait
30
- ret = []
31
- begin
32
-
33
- end
34
3
  end
35
4
  end
@@ -0,0 +1,89 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+
9
+ def query_persons(q)
10
+ end
11
+
12
+ def query_person(person)
13
+ end
14
+
15
+ def query_publications(q)
16
+ end
17
+
18
+ def query_publication(publication)
19
+ end
20
+ def fetch_value_by_dc_key(record, key)
21
+ ret = ''
22
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
23
+ unless record.nil?
24
+ unless record[fullkey].nil?
25
+ if record[fullkey].class == String
26
+ ret = record[fullkey ]
27
+ elsif record[fullkey].class == Array
28
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
29
+ end
30
+ end
31
+ end
32
+ ret
33
+ end
34
+ # return in-memory Publication instances with associated Holdings
35
+ def query_publications_by_person(person, ctx = nil)
36
+ ret = []
37
+ begin
38
+ # first run obtain counts for the query
39
+ escaped_person = URI.escape(person)
40
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
41
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
+ total = json['countInfos']['total']
43
+ # then start loading the results
44
+ result_page = 1
45
+ recs = []
46
+ while recs.length < total
47
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
48
+ puts "DBG: retrieving results page #{result_page}"
49
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
+ recs += json
51
+ result_page += 1
52
+ # sleep 1 # respect the server and avoid flood-blocking
53
+ end
54
+ recs.each do |r|
55
+ begin
56
+ p = Publication.new(ctx)
57
+ p.title = fetch_value_by_dc_key(r, 'title')
58
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
59
+ p.language = fetch_value_by_dc_key(r, 'language')
60
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
61
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
62
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
63
+ p.source_id = fetch_value_by_dc_key(r, '@id')
64
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
65
+ additional_urls = []
66
+ r.keys.each do |key|
67
+ val = fetch_value_by_dc_key(r, key)
68
+ additional_urls << val if val =~ /https?:[^\s]\/\//
69
+ end
70
+ p.additional_urls = additional_urls if additional_urls.length > 0
71
+ h = Holding.new
72
+ h.source_id = p.source_id
73
+ h.source_name = 'NLI API'
74
+ h.location = fetch_value_by_dc_key(r, 'recordid')
75
+ p.add_holding(h)
76
+ ret << p
77
+ rescue Exception
78
+ puts $!
79
+ end
80
+ end
81
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
83
+ rescue Exception
84
+ puts $!
85
+ end
86
+ return ret
87
+ end
88
+ end
89
+ end
data/test/test_gared.rb CHANGED
@@ -1,8 +1,24 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ recs = nli.query_publications_by_person('ביאליק')
16
+ #recs = nli.query_publications_by_person('אילנאה')
17
+ refute_nil recs
18
+ refute_empty(recs)
19
+ refute_empty(recs[0].title)
20
+ end
21
+
6
22
  def test_primo_query_publicatios_by_person
7
23
  puts "Testing Primo"
8
24
  primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.22
4
+ version: 0.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2021-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources