gared 0.0.23 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
- data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
3
+ metadata.gz: e8dcff1820d0b68f25346b0899c3417eb0aa758ef282c65bf008aa0c36e5dd2a
4
+ data.tar.gz: 21277a0b51dbd03c75598e1d5ca37c09875d32e62e668facec2c82f335f3c78c
5
5
  SHA512:
6
- metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
- data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
6
+ metadata.gz: d37c27081fb9385373d0336ac2d8d3f270e35c520ae180bfcea85e51f5fe13ae157fe6c2d8c97407890f0a4f899c8f416a757941470e3cc0cb8471e65ce22cca
7
+ data.tar.gz: 86f92c9ca4609bdeb31180bc6e63f46c76f212bc5665b3acd6997510cb91459b5a11356b44be07f707a626d512074f19c466d9cc085caf38f2e8fde0230a2604
@@ -5,9 +5,13 @@ module Gared
5
5
  def initialize(api_key, page_size = '40')
6
6
  @options = {api_key: api_key, maxResults: page_size}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_publications_by_person(person, ctx = nil)
10
- url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{URI.escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
14
+ url = "https://www.googleapis.com/books/v1/volumes?q=inauthor:#{uri_escape(person)}&filter=full&key=#{@options[:api_key]}&maxResults=#{@options[:maxResults]}"
11
15
  resp = JSON.parse(RestClient.get(url))
12
16
 
13
17
  total = resp['totalItems']
data/lib/gared/nli_api.rb CHANGED
@@ -5,7 +5,11 @@ module Gared
5
5
  def initialize(url, api_key)
6
6
  @options = {url: url, api_key: api_key}
7
7
  end
8
-
8
+ def uri_escape(s)
9
+ p = URI::Parser.new
10
+ return p.escape(s)
11
+ end
12
+
9
13
  def query_persons(q)
10
14
  end
11
15
 
@@ -36,15 +40,15 @@ module Gared
36
40
  ret = []
37
41
  begin
38
42
  # first run obtain counts for the query
39
- escaped_person = URI.escape(person)
40
- url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
43
+ escaped_person = uri_escape(person)
44
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&count_mode=true"
41
45
  json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
46
  total = json['countInfos']['total']
43
47
  # then start loading the results
44
48
  result_page = 1
45
49
  recs = []
46
50
  while recs.length < total
47
- url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
51
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=book&result_page=#{result_page}"
48
52
  puts "DBG: retrieving results page #{result_page}"
49
53
  json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
54
  recs += json
@@ -79,7 +83,7 @@ module Gared
79
83
  end
80
84
  end
81
85
  # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
- # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
86
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=book
83
87
  rescue Exception
84
88
  puts $!
85
89
  end
data/lib/gared/primo.rb CHANGED
@@ -6,7 +6,11 @@ module Gared
6
6
  def initialize(url, institution)
7
7
  @options = {url: url, institution: institution}
8
8
  end
9
-
9
+ def uri_escape(s)
10
+ p = URI::Parser.new
11
+ return p.escape(s)
12
+ end
13
+
10
14
  def query_persons(q)
11
15
  end
12
16
 
@@ -23,14 +27,14 @@ module Gared
23
27
  def query_publications_by_person(person, ctx = nil)
24
28
  ret = []
25
29
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
30
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
31
  json = JSON.parse(RestClient.get(url))
28
32
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
33
  start_at = 1
30
34
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
35
  while recs.length < total
32
36
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
38
  json = JSON.parse(RestClient.get(url))
35
39
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
40
  sleep 1 # respect the server and avoid flood-blocking
data/lib/gared.rb CHANGED
@@ -9,5 +9,4 @@ module Gared
9
9
  require 'gared/googlebooks'
10
10
  require 'gared/nli_api'
11
11
  # ...
12
-
13
12
  end
data/test/test_gared.rb CHANGED
@@ -4,44 +4,40 @@ require 'gared'
4
4
 
5
5
  class GaredTest < Minitest::Test
6
6
 
7
- def test_nli_api_query_publicatios_by_person
7
+ def test_nli_api_query_publications_by_person
8
8
  if ENV['NLI_API_KEY'].nil?
9
9
  puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
10
  return
11
11
  end
12
12
  puts "Testing NLI API"
13
+ byebug
13
14
  nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
15
  refute_nil nli
15
- recs = nli.query_publications_by_person('ביאליק')
16
- #recs = nli.query_publications_by_person('אילנאה')
16
+ #recs = nli.query_publications_by_person('ביאליק')
17
+ recs = nli.query_publications_by_person('אילנאה')
17
18
  refute_nil recs
18
19
  refute_empty(recs)
19
20
  refute_empty(recs[0].title)
20
21
  end
21
22
 
22
- def test_primo_query_publicatios_by_person
23
- puts "Testing Primo"
24
- primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
25
- refute_nil primo
26
- recs = primo.query_publications_by_person('אילנאה')
27
- refute_nil recs
28
- refute_empty(recs)
29
- refute_empty(recs[0].title)
30
- end
23
+ # temporarily disabled until we find another Primo server to test against
24
+ # def test_primo_query_publications_by_person
25
+ # puts "Testing Primo"
26
+ # primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
27
+ # refute_nil primo
28
+ # recs = primo.query_publications_by_person('אילנאה')
29
+ # refute_nil recs
30
+ # refute_empty(recs)
31
+ # refute_empty(recs[0].title)
32
+ # end
31
33
 
32
- def test_aleph_query_publicatios_by_person
33
- puts "Testing Aleph"
34
- aleph = Gared::Aleph.new('aleph.nli.org.il', 9991, 'NNL01')
35
- refute_nil aleph
36
- recs = aleph.query_publications_by_person('אילנאה')
37
- refute_nil recs
38
- refute_empty(recs)
39
- refute_empty(recs[0].title)
40
- end
41
-
42
- def test_googlebooks_query_publicatios_by_person
34
+ def test_googlebooks_query_publications_by_person
35
+ if ENV['GOOGLE_API_KEY'].nil?
36
+ puts "skipping Google Books API test because GOOGLE_API_KEY envvar is not set"
37
+ return
38
+ end
43
39
  puts "Testing Google Books"
44
- gb = Gared::Googlebooks.new('AIzaSyCE2WFqTPdxAz1wv2f33hMfPWIF4tcocgM') # a key I made just for testing this gem. Please do not abuse.
40
+ gb = Gared::Googlebooks.new(ENV['GOOGLE_API_KEY'])
45
41
  refute_nil gb
46
42
  recs = gb.query_publications_by_person('מנדלי')
47
43
  refute_nil recs
@@ -49,7 +45,7 @@ class GaredTest < Minitest::Test
49
45
  refute_empty(recs[0].title)
50
46
  end
51
47
 
52
- def test_hebrewbooks_query_publicatios_by_person
48
+ def test_hebrewbooks_query_publications_by_person
53
49
  skip("Skipping testing Hebrewbooks because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
54
50
  puts "Testing Hebrewbooks"
55
51
  hb = Gared::Hebrewbooks.new
@@ -60,7 +56,7 @@ class GaredTest < Minitest::Test
60
56
  refute_empty(recs[0].title)
61
57
  end
62
58
 
63
- def test_idea_query_publicatios_by_person
59
+ def test_idea_query_publications_by_person
64
60
  skip("Skipping testing IDEA because chromedriver not found") unless `chromedriver -v` =~ /ChromeDriver/
65
61
  puts "Testing IDEA"
66
62
  idea = Gared::Idea.new('http://infocenters.co.il/RAANANA/')
@@ -74,4 +70,4 @@ class GaredTest < Minitest::Test
74
70
  assert_empty(recs)
75
71
  end
76
72
 
77
- end
73
+ end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-13 00:00:00.000000000 Z
11
+ date: 2023-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: zoom
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0.5'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0.5'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: watir
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -87,7 +73,6 @@ extensions: []
87
73
  extra_rdoc_files: []
88
74
  files:
89
75
  - lib/gared.rb
90
- - lib/gared/aleph.rb
91
76
  - lib/gared/googlebooks.rb
92
77
  - lib/gared/hebrewbooks.rb
93
78
  - lib/gared/holding.rb
@@ -102,7 +87,7 @@ homepage: https://gitlab.com/abartov/gared
102
87
  licenses:
103
88
  - MIT
104
89
  metadata: {}
105
- post_install_message:
90
+ post_install_message:
106
91
  rdoc_options: []
107
92
  require_paths:
108
93
  - lib
@@ -117,8 +102,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
102
  - !ruby/object:Gem::Version
118
103
  version: '0'
119
104
  requirements: []
120
- rubygems_version: 3.1.4
121
- signing_key:
105
+ rubygems_version: 3.2.32
106
+ signing_key:
122
107
  specification_version: 4
123
108
  summary: Scrape Hebrew bibliography sources
124
109
  test_files:
data/lib/gared/aleph.rb DELETED
@@ -1,80 +0,0 @@
1
- # Z39.50 values according to https://www.loc.gov/z3950/agency/bib1.html
2
- # and NLI info according to http://web.nli.org.il/sites/NLI/Hebrew/infochannels/librarians/Pages/Z39.50.aspx
3
- # Name of Database: NNL01
4
- # Host name: aleph.nli.org.il
5
- # IP address: 192.114.7.200
6
- # Port: 9991
7
- # Character-set: UTF-8
8
- # We support the following record syntaxes:
9
- # USMARC, OPAC, XML
10
- # We support the following word searches:
11
- # 1016, 1017, 1,1003, 1004,4,21,30,31,7,12,1007,1031,1007,5028,1033
12
- # We support the following phrase searches:
13
- # 7,12,1,1003,1004,4,21,15
14
- # We support the following sorts:
15
- # 1,4,30,31,1003
16
-
17
- module Gared
18
- require 'zoom'
19
- require 'nokogiri'
20
- class Aleph
21
- def initialize(host, port, database, syntax = 'USMARC')
22
- @options = {host: host, port: port, database: database, syntax: syntax}
23
- end
24
- def query_persons(q)
25
- end
26
-
27
- def query_person(person)
28
- end
29
-
30
- def query_publications(q)
31
- end
32
-
33
- def query_publication(publication)
34
- end
35
-
36
- def query_publications_by_person(person, ctx = nil)
37
- ZOOM::Connection.open(@options[:host], @options[:port]) do |conn|
38
- conn.database_name = @options[:database] # 'aleph.nli.org.il',9991
39
- conn.preferred_record_syntax = @options[:syntax]
40
- rset = conn.search("@attr 1=1003 @attr 2=3 @attr 4=1 \"#{person}\"")
41
- rr = rset.records
42
- return nil if rr.nil? or rr.empty?
43
- ret = []
44
- rr.each do |r|
45
- xml = Nokogiri::Slop(r.xml)
46
- xml.remove_namespaces! # keeps biting me :)
47
- # these scrapes are based on the National Library of Israel usage. No attempt to make it generic. :)
48
- p = Publication.new(ctx)
49
- begin
50
- p.author_line = xml.xpath('//datafield[@tag=\'100\']/subfield[@code=\'a\']')[0].text
51
- # puts "author: #{p.author_line}" # DEBUG
52
- rescue
53
- nil
54
- end
55
- begin
56
- p.title = xml.xpath('//datafield[@tag=\'245\']/subfield[@code=\'a\']')[0].text
57
- # puts "title: #{p.title}" # DEBUG
58
- rescue
59
- nil
60
- end
61
- begin
62
- p.notes = xml.xpath('//datafield[@tag=\'500\']/subfield[@code=\'a\']').collect{|note| note.text}.join("\n")
63
- rescue
64
- nil
65
- end
66
- begin
67
- h = Holding.new
68
- h.source_id = xml.xpath('//datafield[@tag=\'090\']/subfield[@code=\'a\']')[0].text
69
- h.source_name = @options[:database]
70
- p.add_holding(h)
71
- ret << p
72
- rescue
73
- nil # ignore records with no holdings; they may be archival files or other non-publications
74
- end #
75
- end
76
- return ret
77
- end
78
- end
79
- end
80
- end