gared 0.0.18 → 0.0.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad89b9fa713cb95d420566a0a9735ad839907661996bfa0e816a87391010e8b6
4
- data.tar.gz: f450165383f2e8c030b4e5e25ade74d5a927009aa2a6c92f174c68e76e5003c1
3
+ metadata.gz: cd1e7f535a0650e0251fec57b3572e49dda88b189b96fe1d1b630acc5e147bb4
4
+ data.tar.gz: b606f6169bdf2cac88f5d29cec6bf4e5464a639bc73b655a24c61aecf7593cd1
5
5
  SHA512:
6
- metadata.gz: e5f35bc4cdb1efe388224d5b0e7973eb40fe7c3e9d241961562b87998e56a058509b9040e94752177b1df3df56ce8aec4617953af7d253bbbfa6e338285c5cba
7
- data.tar.gz: 6b070deefebb23a21d191155bc1e5ff8bf80a60261a74a657486ce2fa44d8244fbeed3ec19a2ae9a196d54649e23a5dced67dace6daaaa2ba1f68c1a548cdf49
6
+ metadata.gz: b5c082ab85a8a7fb4cf5c23af7ebc35584751ebcaf88defc1b4f8c8d808d5cb3ff5d213db4b07687bfae49fda11b12f6963c9c6d2bd2e6b1d2e5edcc487e6e4b
7
+ data.tar.gz: a4415463242669077a504252094c2e93f63469fe6fcd4faeeee7a5cceb852b5325608dd5255991c79f04d725856ac61296023301c39af0a4f48288dbe1ddad70
data/lib/gared.rb CHANGED
@@ -7,6 +7,7 @@ module Gared
7
7
  require 'gared/hebrewbooks'
8
8
  require 'gared/idea'
9
9
  require 'gared/googlebooks'
10
+ require 'gared/nli_api'
10
11
  # ...
11
12
 
12
13
  end
data/lib/gared/holding.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module Gared
2
2
  class Holding
3
- attr_accessor :source_id, :source_name, :scan_url
3
+ attr_accessor :source_id, :source_name, :scan_url, :location
4
4
 
5
5
  end
6
6
  end
@@ -0,0 +1,89 @@
1
+ require 'rest-client'
2
+
3
+ module Gared
4
+ class Nli_Api
5
+ def initialize(url, api_key)
6
+ @options = {url: url, api_key: api_key}
7
+ end
8
+
9
+ def query_persons(q)
10
+ end
11
+
12
+ def query_person(person)
13
+ end
14
+
15
+ def query_publications(q)
16
+ end
17
+
18
+ def query_publication(publication)
19
+ end
20
+ def fetch_value_by_dc_key(record, key)
21
+ ret = ''
22
+ fullkey = key[0] == '@' ? key : 'http://purl.org/dc/elements/1.1/' + key
23
+ unless record.nil?
24
+ unless record[fullkey].nil?
25
+ if record[fullkey].class == String
26
+ ret = record[fullkey ]
27
+ elsif record[fullkey].class == Array
28
+ ret = record[fullkey].map{|x| x['@value'] }.join('; ')
29
+ end
30
+ end
31
+ end
32
+ ret
33
+ end
34
+ # return in-memory Publication instances with associated Holdings
35
+ def query_publications_by_person(person, ctx = nil)
36
+ ret = []
37
+ begin
38
+ # first run obtain counts for the query
39
+ escaped_person = URI.escape(person)
40
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&count_mode=true"
41
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
42
+ total = json['countInfos']['total']
43
+ # then start loading the results
44
+ result_page = 1
45
+ recs = []
46
+ while recs.length < total
47
+ url = @options[:url]+"?api_key=#{@options[:api_key]}&query=creator,contains,#{escaped_person},AND;language,exact,heb&sort_field=title&material_type=books&result_page=#{result_page}"
48
+ puts "DBG: retrieving results page #{result_page}"
49
+ json = JSON.parse(RestClient::Resource.new(url,verify_ssl: OpenSSL::SSL::VERIFY_NONE).get)
50
+ recs += json
51
+ result_page += 1
52
+ # sleep 1 # respect the server and avoid flood-blocking
53
+ end
54
+ recs.each do |r|
55
+ begin
56
+ p = Publication.new(ctx)
57
+ p.title = fetch_value_by_dc_key(r, 'title')
58
+ p.author_line = fetch_value_by_dc_key(r, 'creator')
59
+ p.language = fetch_value_by_dc_key(r, 'language')
60
+ p.notes = "#{fetch_value_by_dc_key(r, 'format')}\n#{fetch_value_by_dc_key(r, 'subject')}"
61
+ p.publisher_line = fetch_value_by_dc_key(r,'publisher')
62
+ p.pub_year = fetch_value_by_dc_key(r, 'non_standard_date')
63
+ p.source_id = fetch_value_by_dc_key(r, '@id')
64
+ # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
65
+ additional_urls = []
66
+ r.keys.each do |key|
67
+ val = fetch_value_by_dc_key(r, key)
68
+ additional_urls << val if val =~ /https?:[^\s]\/\//
69
+ end
70
+ p.additional_urls = additional_urls if additional_urls.length > 0
71
+ h = Holding.new
72
+ h.source_id = p.source_id
73
+ h.source_name = 'NLI API'
74
+ h.location = fetch_value_by_dc_key(r, 'recordid')
75
+ p.add_holding(h)
76
+ ret << p
77
+ rescue Exception
78
+ puts $!
79
+ end
80
+ end
81
+ # TODO: also collect IIIF links for the *subset* of titles that have them, using the availability_type param. No way to get that in the above query -- the fields are not emitted.
82
+ # the URL is like https://api.nli.org.il/openlibrary/search?api_key=(((KEY)))&query=title,contains,querystring&availability_type=online_and_api_access&material_type=books
83
+ rescue Exception
84
+ puts $!
85
+ end
86
+ return ret
87
+ end
88
+ end
89
+ end
data/lib/gared/primo.rb CHANGED
@@ -23,14 +23,14 @@ module Gared
23
23
  def query_publications_by_person(person, ctx = nil)
24
24
  ret = []
25
25
  begin
26
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books}&json=true"
26
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
27
27
  json = JSON.parse(RestClient.get(url))
28
28
  total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
29
29
  start_at = 1
30
30
  recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
31
31
  while recs.length < total
32
32
  start_at += 50
33
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books}&json=true"
33
+ url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{URI.escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
34
34
  json = JSON.parse(RestClient.get(url))
35
35
  recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
36
36
  sleep 1 # respect the server and avoid flood-blocking
@@ -55,6 +55,8 @@ module Gared
55
55
  h = Holding.new
56
56
  h.source_id = p.source_id
57
57
  h.source_name = 'Primo:'+@options[:institution]
58
+
59
+ h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example
58
60
  p.add_holding(h)
59
61
  ret << p
60
62
  rescue Exception
@@ -67,4 +69,4 @@ module Gared
67
69
  return ret
68
70
  end
69
71
  end
70
- end
72
+ end
data/test/test_gared.rb CHANGED
@@ -1,8 +1,24 @@
1
+ require 'minitest/byebug' if ENV['DEBUG']
1
2
  require 'minitest/autorun'
2
3
  require 'gared'
3
4
 
4
5
  class GaredTest < Minitest::Test
5
6
 
7
+ def test_nli_api_query_publicatios_by_person
8
+ if ENV['NLI_API_KEY'].nil?
9
+ puts "skipping NLI API test because NLI_API_KEY envvar is not set"
10
+ return
11
+ end
12
+ puts "Testing NLI API"
13
+ nli = Gared::Nli_Api.new('https://api.nli.org.il/openlibrary/search', ENV['NLI_API_KEY'])
14
+ refute_nil nli
15
+ recs = nli.query_publications_by_person('ביאליק')
16
+ #recs = nli.query_publications_by_person('אילנאה')
17
+ refute_nil recs
18
+ refute_empty(recs)
19
+ refute_empty(recs[0].title)
20
+ end
21
+
6
22
  def test_primo_query_publicatios_by_person
7
23
  puts "Testing Primo"
8
24
  primo = Gared::Primo.new('http://primo.nli.org.il/PrimoWebServices/xservice/search/brief', 'NNL')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-10 00:00:00.000000000 Z
11
+ date: 2021-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zoom
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.8'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.8'
69
69
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ files:
93
93
  - lib/gared/holding.rb
94
94
  - lib/gared/idea.rb
95
95
  - lib/gared/jpress.rb
96
+ - lib/gared/nli_api.rb
96
97
  - lib/gared/person.rb
97
98
  - lib/gared/primo.rb
98
99
  - lib/gared/publication.rb
@@ -116,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
117
  - !ruby/object:Gem::Version
117
118
  version: '0'
118
119
  requirements: []
119
- rubyforge_project:
120
- rubygems_version: 2.7.7
120
+ rubygems_version: 3.1.4
121
121
  signing_key:
122
122
  specification_version: 4
123
123
  summary: Scrape Hebrew bibliography sources