gared 0.0.21 → 0.0.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b31908b108520003b9d2f7e4c37a3d7f8ccab124f577343b5e5a7b7f1473432
4
- data.tar.gz: 7ebd7bf7246c5c703f021f403460aa8b3ba1107842df23ebc1e6b0f40e89b9a5
3
+ metadata.gz: 3857f28b69b7cd0d1080208215cbf1bfc57a30a3af5df8162a98aee5b7558939
4
+ data.tar.gz: a3cf8fbe5018b902db45f6d70c8223f32101c6eb747b070a7f7dc56a8a37c48c
5
5
  SHA512:
6
- metadata.gz: 169547fe7caaf6f0777460b7c76cae17c6583a4b3c064e130dd6ddc33bf50bf7984576b216a3c8554dd08f6911a7b2f930e6b4d6f1b2713df729c2c3fc3617ed
7
- data.tar.gz: dea903b2aee797c53f62956645bdfa99737fa29eec4019f670a139cac34ee46dca97954b52b217594c0d314d9513b2f6f08f9cea8b474b94248101d18d3628f1
6
+ metadata.gz: 69f3b46c6fa96f5bf2c2f440fbc9bf93bbe57ec8ac61e84a9621289e67b919342558a323a9b14008d58f7472e48651ee43bd584879b011d78741a6212b7e81b1
7
+ data.tar.gz: f822f3a7f95e6c4ca38f78843a4309b764a624d053234bc880b672ec57cf011b88b3089f5a40836f328a8e1a366cb6660500609449a11eed16d5b0c7a86ac64e
@@ -5,9 +5,13 @@ module Gared
5
5
  def initialize(api_key, page_size = '40')
6
6
  @options = {api_key: api_key, maxResults: page_size}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_publications_by_person(person, ctx = nil)
10
- url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{URI.escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
14
+ url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
11
15
  resp = JSON.parse(RestClient.get(url))
12
16
 
13
17
  total = resp['totalItems']
data/lib/gared/jpress.rb CHANGED
@@ -1,35 +1,4 @@
1
1
  module Gared
2
2
  class Jpress
3
- require 'watir'
4
-
5
- def initialize
6
- @browser = Watir::Browser.new :chrome, options: {args: ['--no-sandbox', '--headless']}
7
- end
8
-
9
- def query_persons(q)
10
- end
11
-
12
- def query_person(person)
13
- end
14
-
15
- def query_publications(q)
16
- end
17
-
18
- def query_publication(publication)
19
- end
20
-
21
- # return in-memory Publication instances with associated Holdings
22
- def query_publications_by_person(person, ctx = nil)
23
- @browser.goto 'http://web.nli.org.il/sites/JPress/Hebrew/Pages/default.aspx'
24
- @browser.wait
25
-
26
- t = @browser.text_field(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_InputKeywords') # srsly, Micro$oft
27
- t.set(person)
28
- @browser.a(id: 'ctl00_PlaceHolderHeader_PlaceHolderSearchArea_ctl00_ctl00_ctl00_SD01C0892_go').click # "quick search" - not necessarily by author!
29
- @browser.wait
30
- ret = []
31
- begin
32
-
33
- end
34
3
  end
35
4
  end
@@ -0,0 +1,93 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
13
+ def query_persons(q)
14
+ end
15
+
16
+ def query_person(person)
17
+ end
18
+
19
+ def query_publications(q)
20
+ end
21
+
22
+ def query_publication(publication)
23
+ end
24
+ def fetch_value_by_dc_key(record, key)
25
+ ret = ''
26
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
27
+ unless record.nil?
28
+ unless record[fullkey].nil?
29
+ if record[fullkey].class == String
30
+ ret = record[fullkey ]
31
+ elsif record[fullkey].class == Array
32
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
33
+ end
34
+ end
35
+ end
36
+ ret
37
+ end
38
+ # return in-memory Publication instances with associated Holdings
39
+ def query_publications_by_person(person, ctx = nil)
40
+ ret = []
41
+ begin
42
+ # first run obtain counts for the query
43
+ escaped_person = uri_escape(person)
44
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
45
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
46
+ total = json['countInfos']['total']
47
+ # then start loading the results
48
+ result_page = 1
49
+ recs = []
50
+ while recs.length < total
51
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
52
+ puts "DBG: retrieving results page #{result_page}"
53
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
54
+ recs += json
55
+ result_page += 1
56
+ # sleep 1 # respect the server and avoid flood-blocking
57
+ end
58
+ recs.each do |r|
59
+ begin
60
+ p = Publication.new(ctx)
61
+ p.title = fetch_value_by_dc_key(r, 'title')
62
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
63
+ p.language = fetch_value_by_dc_key(r, 'language')
64
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
65
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
66
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
67
+ p.source_id = fetch_value_by_dc_key(r, '@id')
68
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
69
+ additional_urls = []
70
+ r.keys.each do |key|
71
+ val = fetch_value_by_dc_key(r, key)
72
+ additional_urls << val if val =~ /https?:[^\s]\/\//
73
+ end
74
+ p.additional_urls = additional_urls if additional_urls.length > 0
75
+ h = Holding.new
76
+ h.source_id = p.source_id
77
+ h.source_name = 'NLI API'
78
+ h.location = fetch_value_by_dc_key(r, 'recordid')
79
+ p.add_holding(h)
80
+ ret << p
81
+ rescue Exception
82
+ puts $!
83
+ end
84
+ end
85
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
86
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
87
+ rescue Exception
88
+ puts $!
89
+ end
90
+ return ret
91
+ end
92
+ end
93
+ end
data/lib/gared/primo.rb CHANGED
@@ -6,7 +6,11 @@ module Gared
6
6
  def initialize(url, institution)
7
7
  @options = {url: url, institution: institution}
8
8
  end
9
-
9
+ def uri_escape(s)
10
+ p = URI::Parser.new
11
+ return p.escape(s)
12
+ end
13
+
10
14
  def query_persons(q)
11
15
  end
12
16
 
@@ -23,14 +27,14 @@ module Gared
23
27
  def query_publications_by_person(person, ctx = nil)
24
28
  ret = []
25
29
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
30
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
31
  json = JSON.parse(RestClient.get(url))
28
32
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
33
  start_at = 1
30
34
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
35
  while recs.length < total
32
36
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
38
  json = JSON.parse(RestClient.get(url))
35
39
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
40
  sleep 1 # respect the server and avoid flood-blocking
@@ -55,7 +59,8 @@ module Gared
55
59
  h = Holding.new
56
60
  h.source_id = p.source_id
57
61
  h.source_name = 'Primo:'+@options[:institution]
58
- h.location = r['LIBRARIES']['LIBRARY'][0]['callNumber']
62
+
63
+ h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example
59
64
  p.add_holding(h)
60
65
  ret << p
61
66
  rescue Exception
data/lib/gared.rb CHANGED
@@ -7,6 +7,6 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
-
12
12
  end
data/test/test_gared.rb CHANGED
@@ -1,31 +1,53 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
6
- def test_primo_query_publicatios_by_person
7
- puts "Testing Primo"
8
- primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
9
- refute_nil primo
10
- recs = primo.query_publications_by_person('אילנאה')
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ #recs = nli.query_publications_by_person('ביאליק')
16
+ recs = nli.query_publications_by_person('אילנאה')
11
17
  refute_nil recs
12
18
  refute_empty(recs)
13
19
  refute_empty(recs[0].title)
14
20
  end
15
21
 
16
- def test_aleph_query_publicatios_by_person
17
- puts "Testing Aleph"
18
- aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
19
- refute_nil aleph
20
- recs = aleph.query_publications_by_person('אילנאה')
21
- refute_nil recs
22
- refute_empty(recs)
23
- refute_empty(recs[0].title)
24
- end
22
+ # temporarily disabled until we find another Primo server to test against
23
+ # def test_primo_query_publicatios_by_person
24
+ # puts "Testing Primo"
25
+ # primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
26
+ # refute_nil primo
27
+ # recs = primo.query_publications_by_person('אילנאה')
28
+ # refute_nil recs
29
+ # refute_empty(recs)
30
+ # refute_empty(recs[0].title)
31
+ # end
32
+
33
+ # temporarily disabled until we find another Aleph server to test against
34
+ # def test_aleph_query_publicatios_by_person
35
+ # puts "Testing Aleph"
36
+ # aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
37
+ # refute_nil aleph
38
+ # recs = aleph.query_publications_by_person('אילנאה')
39
+ # refute_nil recs
40
+ # refute_empty(recs)
41
+ # refute_empty(recs[0].title)
42
+ # end
25
43
 
26
44
  def test_googlebooks_query_publicatios_by_person
45
+ if ENV['GOOGLE_API_KEY'].nil?
46
+ puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
47
+ return
48
+ end
27
49
  puts "Testing Google Books"
28
- gb = Gared::Googlebooks.new('AIzaSyCE2WFqTPdxAz1wv2f33hMfPWIF4tcocgM') # a key I made just for testing this gem. Please do not abuse.
50
+ gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
29
51
  refute_nil gb
30
52
  recs = gb.query_publications_by_person('מנדלי')
31
53
  refute_nil recs
@@ -58,4 +80,4 @@ class GaredTest < Minitest::Test
58
80
  assert_empty(recs)
59
81
  end
60
82
 
61
- end
83
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.21
4
+ version: 0.0.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-19 00:00:00.000000000 Z
11
+ date: 2022-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources