berkeley_library-tind 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/misc.xml +4 -0
  6. data/.idea/modules.xml +8 -0
  7. data/.idea/tind.iml +138 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +58 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +73 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-tind.gemspec +50 -0
  21. data/bin/tind-export +14 -0
  22. data/docker-compose.yml +15 -0
  23. data/lib/berkeley_library/tind.rb +3 -0
  24. data/lib/berkeley_library/tind/api.rb +1 -0
  25. data/lib/berkeley_library/tind/api/api.rb +132 -0
  26. data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
  27. data/lib/berkeley_library/tind/api/collection.rb +82 -0
  28. data/lib/berkeley_library/tind/api/date_range.rb +67 -0
  29. data/lib/berkeley_library/tind/api/format.rb +32 -0
  30. data/lib/berkeley_library/tind/api/search.rb +100 -0
  31. data/lib/berkeley_library/tind/config.rb +103 -0
  32. data/lib/berkeley_library/tind/export.rb +1 -0
  33. data/lib/berkeley_library/tind/export/column.rb +54 -0
  34. data/lib/berkeley_library/tind/export/column_group.rb +144 -0
  35. data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
  36. data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
  37. data/lib/berkeley_library/tind/export/config.rb +154 -0
  38. data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
  39. data/lib/berkeley_library/tind/export/export.rb +47 -0
  40. data/lib/berkeley_library/tind/export/export_command.rb +168 -0
  41. data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
  42. data/lib/berkeley_library/tind/export/export_format.rb +67 -0
  43. data/lib/berkeley_library/tind/export/exporter.rb +105 -0
  44. data/lib/berkeley_library/tind/export/filter.rb +52 -0
  45. data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
  46. data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
  47. data/lib/berkeley_library/tind/export/row.rb +24 -0
  48. data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
  49. data/lib/berkeley_library/tind/export/table.rb +175 -0
  50. data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
  51. data/lib/berkeley_library/tind/marc.rb +1 -0
  52. data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
  53. data/lib/berkeley_library/tind/module_info.rb +14 -0
  54. data/lib/berkeley_library/util/arrays.rb +178 -0
  55. data/lib/berkeley_library/util/logging.rb +1 -0
  56. data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
  57. data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
  58. data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
  59. data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
  60. data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
  61. data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
  62. data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
  63. data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
  64. data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
  65. data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
  66. data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
  67. data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
  68. data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
  69. data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
  70. data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
  71. data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
  72. data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
  73. data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
  74. data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
  75. data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
  76. data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
  77. data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
  78. data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
  79. data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
  80. data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
  81. data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
  82. data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
  83. data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
  84. data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
  85. data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
  86. data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
  87. data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
  88. data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
  89. data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
  90. data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
  91. data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
  92. data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
  93. data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
  94. data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
  95. data/lib/berkeley_library/util/paths.rb +111 -0
  96. data/lib/berkeley_library/util/stringios.rb +30 -0
  97. data/lib/berkeley_library/util/strings.rb +42 -0
  98. data/lib/berkeley_library/util/sys_exits.rb +15 -0
  99. data/lib/berkeley_library/util/times.rb +22 -0
  100. data/lib/berkeley_library/util/uris.rb +44 -0
  101. data/lib/berkeley_library/util/uris/appender.rb +162 -0
  102. data/lib/berkeley_library/util/uris/requester.rb +62 -0
  103. data/lib/berkeley_library/util/uris/validator.rb +32 -0
  104. data/rakelib/bundle.rake +8 -0
  105. data/rakelib/coverage.rake +11 -0
  106. data/rakelib/gem.rake +54 -0
  107. data/rakelib/rubocop.rake +18 -0
  108. data/rakelib/spec.rake +2 -0
  109. data/spec/.rubocop.yml +40 -0
  110. data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
  111. data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
  112. data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
  113. data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
  114. data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
  115. data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
  116. data/spec/berkeley_library/tind/config_spec.rb +86 -0
  117. data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
  118. data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
  119. data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
  120. data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
  121. data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
  122. data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
  123. data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
  124. data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
  125. data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
  126. data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
  127. data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
  128. data/spec/berkeley_library/util/arrays_spec.rb +340 -0
  129. data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
  130. data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
  131. data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
  132. data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
  133. data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
  134. data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
  135. data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
  136. data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
  137. data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
  138. data/spec/berkeley_library/util/paths_spec.rb +90 -0
  139. data/spec/berkeley_library/util/stringios_spec.rb +34 -0
  140. data/spec/berkeley_library/util/strings_spec.rb +27 -0
  141. data/spec/berkeley_library/util/times_spec.rb +39 -0
  142. data/spec/berkeley_library/util/uris_spec.rb +118 -0
  143. data/spec/data/collection-names.txt +438 -0
  144. data/spec/data/collections.json +4827 -0
  145. data/spec/data/disjoint-records.xml +187 -0
  146. data/spec/data/record-184453.xml +58 -0
  147. data/spec/data/record-184458.xml +63 -0
  148. data/spec/data/record-187888.xml +78 -0
  149. data/spec/data/records-api-search-cjk-p1.xml +6381 -0
  150. data/spec/data/records-api-search-cjk-p2.xml +5 -0
  151. data/spec/data/records-api-search-p1.xml +4506 -0
  152. data/spec/data/records-api-search-p2.xml +4509 -0
  153. data/spec/data/records-api-search-p3.xml +4506 -0
  154. data/spec/data/records-api-search-p4.xml +4509 -0
  155. data/spec/data/records-api-search-p5.xml +4506 -0
  156. data/spec/data/records-api-search-p6.xml +2436 -0
  157. data/spec/data/records-api-search-p7.xml +5 -0
  158. data/spec/data/records-api-search.xml +234 -0
  159. data/spec/data/records-manual-search.xml +547 -0
  160. data/spec/spec_helper.rb +30 -0
  161. data/test/profile/table_from_records_profile.rb +46 -0
  162. metadata +585 -0
@@ -0,0 +1,111 @@
1
+ require 'berkeley_library/util/stringios'
2
+
3
+ module BerkeleyLibrary
4
+ module Util
5
+ # This module, modeled on the {https://golang.org/pkg/path/ Go `path` package},
6
+ # provides utility routines for modifying paths separated by forward slashes,
7
+ # such as URL paths. For system-dependent file paths, use
8
+ # {https://ruby-doc.org/stdlib-2.7.0/libdoc/pathname/rdoc/Pathname.html `Pathname`}
9
+ # instead.
10
+ module Paths
11
+ include BerkeleyLibrary::Util::StringIOs
12
+
13
+ class << self
14
+ include Paths
15
+ end
16
+
17
+ # Returns the shortest path name equivalent to `path` by purely lexical
18
+ # processing by:
19
+ #
20
+ # 1. replacing runs of multiple `/` with a single `/`
21
+ # 2. eliminating all `.` (current directory) elements
22
+ # 3. eliminating all `<child>/..` in favor of directly
23
+ # referencing the parent directory
24
+ # 4. replaing all `/..` at the beginning of the path
25
+ # with a single leading `/`
26
+ #
27
+ # The returned path ends in a slash only if it is the root `/`.
28
+ # @see https://9p.io/sys/doc/lexnames.html Rob Pike, "Lexical File Names in Plan 9 or Getting Dot-Dot Right"
29
+ #
30
+ # @param path [String, nil] the path to clean
31
+ # @return [String, nil] the cleaned path, or `nil` for a nil path.
32
+ def clean(path)
33
+ return unless path
34
+ return '.' if ['', '.'].include?(path)
35
+
36
+ StringIO.new.tap do |out|
37
+ out << '/' if path[0] == '/'
38
+ dotdot = (r = out.size)
39
+ r, dotdot = process_next(r, dotdot, path, out) while r < path.size
40
+ out << '.' if out.pos == 0
41
+ end.string
42
+ end
43
+
44
+ # Joins any number of path elements into a single path, separating
45
+ # them with slashes, ignoring empty elements and passing the result
46
+ # to {Paths#clean}.
47
+ #
48
+ # @param elements [Array<String>] the elements to join
49
+ # @return [String] the joined path
50
+ def join(*elements)
51
+ elements = elements.reject { |e| [nil, ''].include?(e) }
52
+ joined_raw = elements.join('/')
53
+ return '' if joined_raw == ''
54
+
55
+ clean(joined_raw)
56
+ end
57
+
58
+ private
59
+
60
+ def process_next(r, dotdot, path, out)
61
+ # empty path element, or .
62
+ return r + 1, dotdot if empty_or_dot?(r, path)
63
+ # .. element: remove to last /
64
+ return handle_dotdot(r, dotdot, path, out) if dotdot?(r, path)
65
+
66
+ # real path element
67
+ [append_from(r, path, out), dotdot]
68
+ end
69
+
70
+ def handle_dotdot(r, dotdot, path, out)
71
+ if out.pos > dotdot
72
+ backtrack_to_dotdot(out, dotdot)
73
+ elsif path[0] != '/'
74
+ dotdot = append_dotdot(out)
75
+ end
76
+
77
+ [r + 2, dotdot]
78
+ end
79
+
80
+ def dotdot?(r, path)
81
+ path[r] == '.' && (r + 2 == path.size || path[r + 2] == '/')
82
+ end
83
+
84
+ def empty_or_dot?(r, path)
85
+ path[r] == '/' || (path[r] == '.' && (r + 1 == path.size || path[r + 1] == '/'))
86
+ end
87
+
88
+ def append_from(r, path, out)
89
+ out << '/' if (path[0] == '/' && out.pos != 1) || (path[0] != '/' && out.pos != 0)
90
+ while r < path.size && path[r] != '/'
91
+ out << path[r]
92
+ r += 1
93
+ end
94
+ r
95
+ end
96
+
97
+ def append_dotdot(out)
98
+ out << '/' if out.pos > 1
99
+ out << '..'
100
+ out.pos
101
+ end
102
+
103
+ def backtrack_to_dotdot(out, dotdot)
104
+ out.seek(-1, IO::SEEK_CUR)
105
+ out.seek(-1, IO::SEEK_CUR) while out.pos > dotdot && getbyte(out, out.pos) != 47 # '/' is ASCII 37
106
+ out.truncate(out.pos)
107
+ end
108
+
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,30 @@
1
+ require 'stringio'
2
+
3
+ module BerkeleyLibrary
4
+ module Util
5
+ module StringIOs
6
+ class << self
7
+ include StringIOs
8
+ end
9
+
10
+ # Returns the byte (**not** character) at the specified byte index
11
+ # in the specified `StringIO`.
12
+ #
13
+ # @param s [StringIO] the StringIO to search in
14
+ # @param i [Integer] the byte index
15
+ # @return [Integer, nil] the byte, or nil if the byte index is invalid.
16
+ def getbyte(s, i)
17
+ return if i >= s.size
18
+ return if s.size + i < 0
19
+
20
+ pos_orig = s.pos
21
+ begin
22
+ s.seek(i >= 0 ? i : s.size + i)
23
+ s.getbyte
24
+ ensure
25
+ s.seek(pos_orig)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ module BerkeleyLibrary
2
+ module Util
3
+ module Strings
4
+
5
+ ASCII_0 = '0'.ord
6
+ ASCII_9 = '9'.ord
7
+
8
+ def ascii_numeric?(s)
9
+ s.chars.all? do |c|
10
+ ord = c.ord
11
+ ord >= ASCII_0 && ord <= ASCII_9
12
+ end
13
+ end
14
+
15
+ # Locates the point at which two strings differ
16
+ #
17
+ # @return [Integer, nil] the index of the first character in either string
18
+ # that differs from the other, or `nil` if the strings are identical,
19
+ # or are not strings
20
+ def diff_index(s1, s2)
21
+ return unless string_like?(s1, s2)
22
+
23
+ shorter, longer = s1.size > s2.size ? [s2, s1] : [s1, s2]
24
+ shorter.chars.each_with_index do |c, i|
25
+ return i if c != longer[i]
26
+ end
27
+ shorter.length if shorter.length < longer.length # otherwise they're equal
28
+ end
29
+
30
+ class << self
31
+ include Strings
32
+ end
33
+
34
+ private
35
+
36
+ def string_like?(*strs)
37
+ strs.all? { |s| s.respond_to?(:chars) && s.respond_to?(:size) }
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,15 @@
1
+ module BerkeleyLibrary
2
+ module Util
3
+ # cf. BSD sysexits.h https://cgit.freebsd.org/src/tree/include/sysexits.h?h=releng/2.0
4
+ module SysExits
5
+ # successful termination
6
+ EX_OK = 0
7
+
8
+ # command line usage error
9
+ EX_USAGE = 64
10
+
11
+ # internal software error
12
+ EX_SOFTWARE = 70 # command line usage error
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ require 'time'
2
+
3
+ module BerkeleyLibrary
4
+ module Util
5
+ module Times
6
+ class << self
7
+ include Times
8
+ end
9
+
10
+ # @param time [Time, Date] the time
11
+ # @return the UTC time corresponding to `time`
12
+ def ensure_utc(time)
13
+ return unless time
14
+ return time if time.respond_to?(:utc?) && time.utc?
15
+ return time.getutc if time.respond_to?(:getutc)
16
+ return time.to_time.getutc if time.respond_to?(:to_time)
17
+
18
+ raise ArgumentError, "Not a date or time: #{time.inspect}"
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ require 'berkeley_library/util/uris/appender'
2
+ require 'berkeley_library/util/uris/requester'
3
+ require 'berkeley_library/util/uris/validator'
4
+
5
+ module BerkeleyLibrary
6
+ module Util
7
+ module URIs
8
+ class << self
9
+ include URIs
10
+ end
11
+
12
+ # Appends the specified paths to the path of the specified URI, removing any extraneous slashes
13
+ # and merging additional query parameters, and returns a new URI with that path and the same scheme,
14
+ # host, query, fragment, etc. as the original.
15
+ #
16
+ # @param uri [URI, String] the original URI
17
+ # @param elements [Array<String, Symbol>] the URI elements to join.
18
+ # @return [URI] a new URI appending the joined path elements.
19
+ # @raise URI::InvalidComponentError if appending the specified elements would create an invalid URI
20
+ def append(uri, *elements)
21
+ Appender.new(uri, *elements).to_uri
22
+ end
23
+
24
+ # Performs a GET request.
25
+ #
26
+ # @param uri [URI, String] the URI to GET
27
+ # @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
28
+ # @param headers [Hash] the request headers.
29
+ # @return [String] the body as a string.
30
+ # @raise [RestClient::Exception] in the event of an error.
31
+ def get(uri, params = {}, headers = {})
32
+ Requester.get(uri, params, headers)
33
+ end
34
+
35
+ # Returns the specified URL as a URI.
36
+ # @param url [String, URI] the URL.
37
+ # @return [URI] the URI.
38
+ # @raise [URI::InvalidURIError] if `url` cannot be parsed as a URI.
39
+ def uri_or_nil(url)
40
+ Validator.uri_or_nil(url)
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,162 @@
1
+ require 'berkeley_library/util/paths'
2
+ require 'uri'
3
+ require 'typesafe_enum'
4
+
5
+ module BerkeleyLibrary
6
+ module Util
7
+ module URIs
8
+
9
+ # Appends the specified paths to the path of the specified URI, removing any extraneous slashes,
10
+ # and builds a new URI with that path and the same scheme, host, query, fragment, etc.
11
+ # as the original.
12
+ class Appender
13
+ attr_reader :original_uri, :elements
14
+
15
+ # Creates and invokes a new {Appender}.
16
+ #
17
+ # @param uri [URI, String] the original URI
18
+ # @param elements [Array<String, Symbol>] the URI elements to join.
19
+ # @raise URI::InvalidComponentError if appending the specified elements would create an invalid URI
20
+ def initialize(uri, *elements)
21
+ raise ArgumentError, 'uri cannot be nil' unless (@original_uri = URIs.uri_or_nil(uri))
22
+
23
+ @elements = elements.map(&:to_s)
24
+ @elements.each_with_index do |element, elem_index|
25
+ next start_query_at(elem_index) if element.include?('?')
26
+ next start_fragment_at(elem_index) if element.include?('#')
27
+
28
+ add_element(element)
29
+ end
30
+ end
31
+
32
+ # Returns the new URI.
33
+ #
34
+ # @return [URI] a new URI appending the joined path elements.
35
+ # @raise URI::InvalidComponentError if appending the specified elements would create an invalid URI
36
+ def to_uri
37
+ original_uri.dup.tap do |new_uri|
38
+ new_uri.path = Paths.join(original_uri.path, *path_elements)
39
+ new_uri.query = query unless query_elements.empty?
40
+ new_uri.fragment = fragment unless fragment_elements.empty?
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def state
47
+ @state ||= :path
48
+ end
49
+
50
+ def in_query?
51
+ state == :query
52
+ end
53
+
54
+ def in_fragment?
55
+ state == :fragment
56
+ end
57
+
58
+ def query
59
+ query_elements.join
60
+ end
61
+
62
+ def fragment
63
+ fragment_elements.join
64
+ end
65
+
66
+ def path_elements
67
+ @path_elements ||= []
68
+ end
69
+
70
+ def query_elements
71
+ @query_elements ||= [].tap { |e| e << original_uri.query if original_uri.query }
72
+ end
73
+
74
+ def fragment_elements
75
+ @fragment_elements ||= [].tap { |e| e << original_uri.fragment if original_uri.fragment }
76
+ end
77
+
78
+ def start_query_at(elem_index)
79
+ raise URI::InvalidComponentError, err_query_after_fragment(elem_index) if in_fragment?
80
+ raise URI::InvalidComponentError, err_too_many_queries(elem_index) unless query_elements.empty?
81
+
82
+ handle_query_start(elem_index)
83
+ @state = :query
84
+ end
85
+
86
+ def start_fragment_at(elem_index)
87
+ raise URI::InvalidComponentError, err_too_many_fragments(elem_index) unless fragment_elements.empty?
88
+ raise URI::InvalidComponentError, err_query_after_fragment(elem_index) if query_after_fragment?(elem_index)
89
+
90
+ handle_fragment_start(elem_index)
91
+ @state = :fragment
92
+ end
93
+
94
+ def query_after_fragment?(elem_index)
95
+ e = elements[elem_index]
96
+ e.index('?', e.index('#'))
97
+ end
98
+
99
+ def add_element(e)
100
+ return fragment_elements << e if in_fragment?
101
+ return query_elements << e if in_query? || (e.include?('&') && !query_elements.empty?)
102
+
103
+ path_elements << e
104
+ end
105
+
106
+ def handle_query_start(elem_index)
107
+ element = elements[elem_index]
108
+
109
+ # if there's anything before the '?', we treat that excess as a path element
110
+ excess, q_start = split_around(element, element.index('?'))
111
+ q_start = push_fragment_start(elem_index, q_start)
112
+
113
+ query_elements << q_start
114
+ path_elements << excess
115
+ end
116
+
117
+ # if the fragment starts in the middle of this element, we keep the part before
118
+ # the fragment delimiter '#', and push the rest (w/'#') back onto the next element
119
+ # to be parsed in the next iteration
120
+ def push_fragment_start(elem_index, q_start)
121
+ return q_start unless (f_index = q_start.index('#'))
122
+
123
+ next_index = elem_index + 1
124
+ q_start, q_next = split_around(q_start, f_index) # NOTE: this doesn't return the '#'
125
+ elements[next_index] = "##{q_next}#{elements[next_index]}" # so we prepend one here
126
+ q_start
127
+ end
128
+
129
+ def handle_fragment_start(elem_index)
130
+ element = elements[elem_index]
131
+
132
+ # if there's anything before the '#', we treat that excess as a path element,
133
+ # or as a query element if there's a query
134
+ excess, f_start = split_around(element, element.index('#'))
135
+
136
+ fragment_elements << f_start
137
+ if in_query?
138
+ query_elements << excess
139
+ else
140
+ path_elements << excess
141
+ end
142
+ end
143
+
144
+ def split_around(s, i)
145
+ [s[0...i], s[(i + 1)..]]
146
+ end
147
+
148
+ def err_too_many_queries(elem_index)
149
+ "#{elements[elem_index].inspect}: URI already has a query string: #{query.inspect}"
150
+ end
151
+
152
+ def err_query_after_fragment(elem_index)
153
+ "#{elements[elem_index].inspect}: Query delimiter '?' cannot follow fragment delimeter '#'"
154
+ end
155
+
156
+ def err_too_many_fragments(elem_index)
157
+ "#{elements[elem_index].inspect}: URI already has a fragment: #{fragment.inspect}"
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,62 @@
1
+ require 'rest-client'
2
+ require 'berkeley_library/util/uris/appender'
3
+ require 'berkeley_library/util/uris/validator'
4
+ require 'berkeley_library/logging'
5
+
6
+ module BerkeleyLibrary
7
+ module Util
8
+ module URIs
9
+ module Requester
10
+ class << self
11
+ include BerkeleyLibrary::Logging
12
+
13
+ # Performs a GET request.
14
+ #
15
+ # @param uri [URI, String] the URI to GET
16
+ # @param params [Hash] the query parameters to add to the URI. (Note that the URI may already include query parameters.)
17
+ # @param headers [Hash] the request headers.
18
+ # @return [String] the body as a string.
19
+ # @raise [RestClient::Exception] in the event of an error.
20
+ def get(uri, params = {}, headers = {})
21
+ url_str = url_str_with_params(uri, params)
22
+ resp = get_or_raise(url_str, headers)
23
+ resp.body
24
+ end
25
+
26
+ private
27
+
28
+ def url_str_with_params(uri, params)
29
+ raise ArgumentError, 'uri cannot be nil' unless (url_str = Validator.url_str_or_nil(uri))
30
+
31
+ elements = [].tap do |ee|
32
+ ee << url_str
33
+ ee << '?' unless url_str.include?('?')
34
+ ee << URI.encode_www_form(params)
35
+ end
36
+
37
+ uri = Appender.new(*elements).to_uri
38
+ uri.to_s
39
+ end
40
+
41
+ def get_or_raise(url_str, headers)
42
+ resp = RestClient.get(url_str, headers)
43
+ begin
44
+ return resp if (status = resp.code) == 200
45
+
46
+ raise(exception_for(resp, status))
47
+ ensure
48
+ logger.info("GET #{url_str} returned #{status}")
49
+ end
50
+ end
51
+
52
+ def exception_for(resp, status)
53
+ RestClient::RequestFailed.new(resp, status).tap do |ex|
54
+ status_message = RestClient::STATUSES[status] || '(Unknown)'
55
+ ex.message = "#{status} #{status_message}"
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end