gared 0.0.21 → 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b31908b108520003b9d2f7e4c37a3d7f8ccab124f577343b5e5a7b7f1473432
4
- data.tar.gz: 7ebd7bf7246c5c703f021f403460aa8b3ba1107842df23ebc1e6b0f40e89b9a5
3
+ metadata.gz: 3857f28b69b7cd0d1080208215cbf1bfc57a30a3af5df8162a98aee5b7558939
4
+ data.tar.gz: a3cf8fbe5018b902db45f6d70c8223f32101c6eb747b070a7f7dc56a8a37c48c
5
5
  SHA512:
6
- metadata.gz: 169547fe7caaf6f0777460b7c76cae17c6583a4b3c064e130dd6ddc33bf50bf7984576b216a3c8554dd08f6911a7b2f930e6b4d6f1b2713df729c2c3fc3617ed
7
- data.tar.gz: dea903b2aee797c53f62956645bdfa99737fa29eec4019f670a139cac34ee46dca97954b52b217594c0d314d9513b2f6f08f9cea8b474b94248101d18d3628f1
6
+ metadata.gz: 69f3b46c6fa96f5bf2c2f440fbc9bf93bbe57ec8ac61e84a9621289e67b919342558a323a9b14008d58f7472e48651ee43bd584879b011d78741a6212b7e81b1
7
+ data.tar.gz: f822f3a7f95e6c4ca38f78843a4309b764a624d053234bc880b672ec57cf011b88b3089f5a40836f328a8e1a366cb6660500609449a11eed16d5b0c7a86ac64e
@@ -5,9 +5,13 @@ module Gared
5
5
  def initialize(api_key, page_size = '40')
6
6
  @options = {api_key: api_key, maxResults: page_size}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_publications_by_person(person, ctx = nil)
10
- url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{URI.escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
14
+ url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
11
15
  resp = JSON.parse(RestClient.get(url))
12
16
 
13
17
  total = resp['totalItems']
data/lib/gared/jpress.rb CHANGED
@@ -1,35 +1,4 @@
1
1
  module Gared
2
2
  class Jpress
3
- require 'watir'
4
-
5
- def initialize
6
- @browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
7
- end
8
-
9
- def query_persons(q)
10
- end
11
-
12
- def query_person(person)
13
- end
14
-
15
- def query_publications(q)
16
- end
17
-
18
- def query_publication(publication)
19
- end
20
-
21
- # return in-memory Publication instances with associated Holdings
22
- def query_publications_by_person(person, ctx = nil)
23
- @browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
24
- @browser.wait
25
-
26
- t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
27
- t.set(person)
28
- @browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
29
- @browser.wait
30
- ret = []
31
- begin
32
-
33
- end
34
3
  end
35
4
  end
@@ -0,0 +1,93 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
13
+ def query_persons(q)
14
+ end
15
+
16
+ def query_person(person)
17
+ end
18
+
19
+ def query_publications(q)
20
+ end
21
+
22
+ def query_publication(publication)
23
+ end
24
+ def fetch_value_by_dc_key(record, key)
25
+ ret = ''
26
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
27
+ unless record.nil?
28
+ unless record[fullkey].nil?
29
+ if record[fullkey].class == String
30
+ ret = record[fullkey ]
31
+ elsif record[fullkey].class == Array
32
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
33
+ end
34
+ end
35
+ end
36
+ ret
37
+ end
38
+ # return in-memory Publication instances with associated Holdings
39
+ def query_publications_by_person(person, ctx = nil)
40
+ ret = []
41
+ begin
42
+ # first run obtain counts for the query
43
+ escaped_person = uri_escape(person)
44
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
45
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
46
+ total = json['countInfos']['total']
47
+ # then start loading the results
48
+ result_page = 1
49
+ recs = []
50
+ while recs.length < total
51
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
52
+ puts "DBG: retrieving results page #{result_page}"
53
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
54
+ recs += json
55
+ result_page += 1
56
+ # sleep 1 # respect the server and avoid flood-blocking
57
+ end
58
+ recs.each do |r|
59
+ begin
60
+ p = Publication.new(ctx)
61
+ p.title = fetch_value_by_dc_key(r, 'title')
62
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
63
+ p.language = fetch_value_by_dc_key(r, 'language')
64
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
65
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
66
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
67
+ p.source_id = fetch_value_by_dc_key(r, '@id')
68
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
69
+ additional_urls = []
70
+ r.keys.each do |key|
71
+ val = fetch_value_by_dc_key(r, key)
72
+ additional_urls << val if val =~ /https?:[^\s]\/\//
73
+ end
74
+ p.additional_urls = additional_urls if additional_urls.length > 0
75
+ h = Holding.new
76
+ h.source_id = p.source_id
77
+ h.source_name = 'NLI API'
78
+ h.location = fetch_value_by_dc_key(r, 'recordid')
79
+ p.add_holding(h)
80
+ ret << p
81
+ rescue Exception
82
+ puts $!
83
+ end
84
+ end
85
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
86
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
87
+ rescue Exception
88
+ puts $!
89
+ end
90
+ return ret
91
+ end
92
+ end
93
+ end
data/lib/gared/primo.rb CHANGED
@@ -6,7 +6,11 @@ module Gared
6
6
  def initialize(url, institution)
7
7
  @options = {url: url, institution: institution}
8
8
  end
9
-
9
+ def uri_escape(s)
10
+ p = URI::Parser.new
11
+ return p.escape(s)
12
+ end
13
+
10
14
  def query_persons(q)
11
15
  end
12
16
 
@@ -23,14 +27,14 @@ module Gared
23
27
  def query_publications_by_person(person, ctx = nil)
24
28
  ret = []
25
29
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
30
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
31
  json = JSON.parse(RestClient.get(url))
28
32
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
33
  start_at = 1
30
34
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
35
  while recs.length < total
32
36
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
38
  json = JSON.parse(RestClient.get(url))
35
39
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
40
  sleep 1 # respect the server and avoid flood-blocking
@@ -55,7 +59,8 @@ module Gared
55
59
  h = Holding.new
56
60
  h.source_id = p.source_id
57
61
  h.source_name = 'Primo:'+@options[:institution]
58
- h.location = r['LIBRARIES']['LIBRARY'][0]['callNumber']
62
+
63
+ h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example
59
64
  p.add_holding(h)
60
65
  ret << p
61
66
  rescue Exception
data/lib/gared.rb CHANGED
@@ -7,6 +7,6 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
-
12
12
  end
data/test/test_gared.rb CHANGED
@@ -1,31 +1,53 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
6
- def test_primo_query_publicatios_by_person
7
- puts "Testing Primo"
8
- primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
9
- refute_nil primo
10
- recs = primo.query_publications_by_person('אילנאה')
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ #recs = nli.query_publications_by_person('ביאליק')
16
+ recs = nli.query_publications_by_person('אילנאה')
11
17
  refute_nil recs
12
18
  refute_empty(recs)
13
19
  refute_empty(recs[0].title)
14
20
  end
15
21
 
16
- def test_aleph_query_publicatios_by_person
17
- puts "Testing Aleph"
18
- aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
19
- refute_nil aleph
20
- recs = aleph.query_publications_by_person('אילנאה')
21
- refute_nil recs
22
- refute_empty(recs)
23
- refute_empty(recs[0].title)
24
- end
22
+ # temporarily disabled until we find another Primo server to test against
23
+ # def test_primo_query_publicatios_by_person
24
+ # puts "Testing Primo"
25
+ # primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
26
+ # refute_nil primo
27
+ # recs = primo.query_publications_by_person('אילנאה')
28
+ # refute_nil recs
29
+ # refute_empty(recs)
30
+ # refute_empty(recs[0].title)
31
+ # end
32
+
33
+ # temporarily disabled until we find another Aleph server to test against
34
+ # def test_aleph_query_publicatios_by_person
35
+ # puts "Testing Aleph"
36
+ # aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
37
+ # refute_nil aleph
38
+ # recs = aleph.query_publications_by_person('אילנאה')
39
+ # refute_nil recs
40
+ # refute_empty(recs)
41
+ # refute_empty(recs[0].title)
42
+ # end
25
43
 
26
44
  def test_googlebooks_query_publicatios_by_person
45
+ if ENV['GOOGLE_API_KEY'].nil?
46
+ puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
47
+ return
48
+ end
27
49
  puts "Testing Google Books"
28
- gb = Gared::Googlebooks.new('AIzaSyCE2WFqTPdxAz1wv2f33hMfPWIF4tcocgM') # a key I made just for testing this gem. Please do not abuse.
50
+ gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
29
51
  refute_nil gb
30
52
  recs = gb.query_publications_by_person('מנדלי')
31
53
  refute_nil recs
@@ -58,4 +80,4 @@ class GaredTest < Minitest::Test
58
80
  assert_empty(recs)
59
81
  end
60
82
 
61
- end
83
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.21
4
+ version: 0.0.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2022-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources