gared 0.0.29 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gared/primo.rb +35 -33
  3. data/test/test_gared.rb +3 -3
  4. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a4f6f4cbc307c097bf84e4308d7cc76257583ba58f97d0c861ebd79e1e362e6
4
- data.tar.gz: 83aca016e843de3bbdbdfbeecdf1b4b544a376d3205970a810c5076ce3d3466e
3
+ metadata.gz: ae6b9daf5b73b6ff01acc3f098ad45ae0dccac12ea146f6966d58e25c3d8a74e
4
+ data.tar.gz: 19396d2e5e6701a22dd7d27e49491a9bd816a60d48278ca5562843c305f88282
5
5
  SHA512:
6
- metadata.gz: 686d91870739b68c421f68c3a55fd8443dec0f4747118137d0cd256758e67e223c9db5617c7cbd408a4295c32936741172f8865dd96c1303a318841ff4157a41
7
- data.tar.gz: 85336cda8af09b48fa3515d90efb5898dbe28592282c17ce3f352cde96ba459aa50a9cab7c2b32c6d32df4afe52a547d7a999a02a19eb4c4edf28e5beff729fd
6
+ metadata.gz: c37a03e8f88f7c9b7d63761ced83736bfbedf55e3e94aee1c0fa9d53855d6d65a899a0e171d142ae46031dfd177fa3214e2cf8a4eb844f1f82db13b355ada1df
7
+ data.tar.gz: f4dd892a74e7fd1282e200f30ea1ab76bd87ebcd30629ec64a6c8c47a87d2d4eeef67481dd35454fe7f967a46b7a6da23bc3274ee7bf02c3fb87903a99720945
data/lib/gared/primo.rb CHANGED
@@ -3,8 +3,8 @@ require 'rest-client'
3
3
 
4
4
  module Gared
5
5
  class Primo
6
- def initialize(url, institution)
7
- @options = {url: url, institution: institution}
6
+ def initialize(url, vid, scope, apikey)
7
+ @options = {url: url, vid: vid, scope: scope, apikey: apikey}
8
8
  end
9
9
  def uri_escape(s)
10
10
  p = URI::Parser.new
@@ -27,48 +27,50 @@ module Gared
27
27
  def query_publications_by_person(person, ctx = nil)
28
28
  ret = []
29
29
  begin
30
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=1&bulkSize=50&query=facet_rtype,exact,books&json=true"
31
- json = JSON.parse(RestClient.get(url))
32
- total = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['@TOTALHITS'].to_i
33
- start_at = 1
34
- recs = json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC'] # stash the records
30
+ base_url = "#{@options[:url]}?vid=#{@options[:vid]}&scope=#{@options[:scope]}&q=creator,contains,#{uri_escape(person)}&qInclude=facet_rtype,exact,books&apikey=#{@options[:apikey]}&limit=50"
31
+ json = JSON.parse(RestClient.get(base_url))
32
+ total = json['info']['total'].to_i
33
+ start_at = 0
34
+ recs = json['docs'] # stash the records
35
35
  while recs.length < total
36
36
  start_at += 50
37
- url = @options[:url]+"?institution=#{@options[:institution]}&query=creator,contains,#{uri_escape(person)}&indx=#{start_at}&bulkSize=50&query=facet_rtype,exact,books&json=true"
37
+ url = base_url+"&offset=#{start_at}"
38
38
  json = JSON.parse(RestClient.get(url))
39
- recs += json['SEGMENTS']['JAGROOT']['RESULT']['DOCSET']['DOC']
40
- sleep 1 # respect the server and avoid flood-blocking
39
+ recs += json['docs']
40
+ sleep 0.5 # respect the server and avoid flood-blocking
41
41
  end
42
42
  recs.each do |r|
43
43
  begin
44
- deets = r['PrimoNMBib']['record']['display']
44
+ deets = r['pnx']['display'] # the fields inside are now always an array!
45
45
  p = Publication.new(ctx)
46
- p.title = deets['title']
47
- p.author_line = deets['creator']
48
- p.language = deets['language']
49
- p.notes = "#{deets['format']}\n#{deets['subject']}"
50
- p.publisher_line = deets['publisher']
51
- p.pub_year = deets['creationdate']
52
- p.source_id = r['PrimoNMBib']['record']['control']['sourcerecordid']
53
- # collect additional URLS from record, for clients to be able to determine whether a scanned object exists
54
- additional_urls = []
55
- deets.keys.each do |key|
56
- if deets[key].class == Array
57
- deets[key].each do |kkey|
58
- additional_urls << kkey if kkey.class == String && kkey =~ /https?:[^\s]/
59
- end
60
- elsif deets[key].class == String
61
- additional_urls << deets[key] if deets[key] =~ /https?:[^\s]/
62
- end
63
- end
64
- p.additional_urls = additional_urls if additional_urls.length > 0
46
+ p.title = deets['title'].join('; ')
47
+ p.author_line = deets['creator'].join('; ')
48
+ p.author_line += deets['contributor'].join('; ') if deets['contributor']
49
+ p.language = deets['language'].join('; ')
50
+ p.notes = deets['format'].join('; ')+r['pnx']['search']['subject'].join('; ') # "#{deets['format']}\n#{deets['subject']}"
51
+ p.publisher_line = deets['publisher'].join('; ')
52
+ p.pub_year = deets['creationdate'].join('; ')
53
+ p.source_id = r['pnx']['control']['sourcerecordid'].join('; ')
54
+ ## collect additional URLS from record, for clients to be able to determine whether a scanned object exists
55
+ # commented out for now, as it needs to be rewritten for the new Primo output structure
56
+ #additional_urls = []
57
+ #deets.keys.each do |key|
58
+ # if deets[key].class == Array
59
+ # deets[key].each do |kkey|
60
+ # additional_urls << kkey if kkey.class == String && kkey =~ /https?:[^\s]/
61
+ # end
62
+ # elsif deets[key].class == String
63
+ # additional_urls << deets[key] if deets[key] =~ /https?:[^\s]/
64
+ # end
65
+ #end
66
+ #p.additional_urls = additional_urls if additional_urls.length > 0
65
67
  h = Holding.new
66
68
  h.source_id = p.source_id
67
- h.source_name = 'Primo:'+@options[:institution]
69
+ h.source_name = 'Primo:'+@options[:vid]
68
70
 
69
71
  begin
70
- h.location = r['LIBRARIES']['LIBRARY'][0].nil? ? r['LIBRARIES']['LIBRARY']['callNumber'] : r['LIBRARIES']['LIBRARY'][0]['callNumber'] # there seem to be two cases, different between NLI and TAU, for example; there also seems to not even be a LIBRARIES key for some electronic resources. We're skipping those.
71
- rescue Exception
72
+ h.location = r['pnx']['search']['callnumber']
73
+ rescue Exception # We're skipping things without a callnumber, as they are not resources within the library itself.
72
74
  puts $!
73
75
  end
74
76
  p.add_holding(h)
data/test/test_gared.rb CHANGED
@@ -1,6 +1,6 @@
1
- require 'minitest/byebug' if ENV['DEBUG']
2
1
  require 'minitest/autorun'
3
2
  require 'gared'
3
+ require 'minitest/debugger' if ENV['DEBUG']
4
4
 
5
5
  class GaredTest < Minitest::Test
6
6
 
@@ -22,8 +22,8 @@ class GaredTest < Minitest::Test
22
22
 
23
23
  # temporarily disabled until we find another Primo server to test against
24
24
  def test_primo_query_publications_by_person
25
- puts "Testing Primo"
26
- primo = Gared::Primo.new('http://tau-primo.hosted.exlibrisgroup.com/PrimoWebServices/xservice/search/brief', 'TAU')
25
+ puts "Testing Primo (did you remember to put a key in PRIMO_API_KEY?)"
26
+ primo = Gared::Primo.new('https://api-eu.hosted.exlibrisgroup.com/primo/v1/search', 'HAU', 'books_and_more', ENV['PRIMO_API_KEY'])
27
27
  refute_nil primo
28
28
  recs = primo.query_publications_by_person('אילנאה')
29
29
  refute_nil recs
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gared
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.29
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-06 00:00:00.000000000 Z
11
+ date: 2024-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: watir
@@ -102,7 +102,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  requirements: []
105
- rubygems_version: 3.4.10
105
+ rubygems_version: 3.5.1
106
106
  signing_key:
107
107
  specification_version: 4
108
108
  summary: Scrape Hebrew bibliography sources