krikri 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/krikri/records_controller.rb +14 -0
  3. data/app/models/krikri/search_index_document.rb +10 -0
  4. data/lib/krikri/engine.rb +1 -0
  5. data/lib/krikri/enricher.rb +156 -0
  6. data/lib/krikri/enrichment.rb +0 -2
  7. data/lib/krikri/enrichments/{deduplication.rb~ → dedup_values.rb~} +1 -4
  8. data/lib/krikri/enrichments/genre_filter.rb +45 -0
  9. data/lib/krikri/enrichments/strip_ending_punctuation.rb +21 -0
  10. data/lib/krikri/enrichments/strip_ending_punctuation.rb~ +18 -0
  11. data/lib/krikri/enrichments/strip_leading_colons.rb +15 -0
  12. data/lib/krikri/enrichments/strip_leading_colons.rb~ +15 -0
  13. data/lib/krikri/enrichments/strip_leading_punctuation.rb +18 -0
  14. data/lib/krikri/enrichments/strip_leading_punctuation.rb~ +18 -0
  15. data/lib/krikri/enrichments/timespan_split.rb +43 -1
  16. data/lib/krikri/harvesters/api_harvester.rb +164 -0
  17. data/lib/krikri/harvesters/api_harvester.rb~ +44 -0
  18. data/lib/krikri/harvesters/mdl_api_harvester.rb~ +7 -0
  19. data/lib/krikri/harvesters/mdl_harvester.rb~ +4 -0
  20. data/lib/krikri/harvesters/oai_harvester.rb +8 -0
  21. data/lib/krikri/ldp/rdf_source.rb +30 -0
  22. data/lib/krikri/mapper.rb +1 -3
  23. data/lib/krikri/provenance_query_client.rb +11 -1
  24. data/lib/krikri/util/extended_date_parser.rb +46 -11
  25. data/lib/krikri/version.rb +1 -1
  26. data/spec/internal/Gemfile.lock +12 -12
  27. data/spec/internal/config/initializers/blacklight_initializer.rb +1 -1
  28. data/spec/internal/config/initializers/devise.rb +2 -2
  29. data/spec/internal/config/secrets.yml +2 -2
  30. data/spec/internal/db/development.sqlite3 +0 -0
  31. data/spec/internal/db/migrate/{20150310190235_devise_create_users.rb → 20150320153132_devise_create_users.rb} +0 -0
  32. data/spec/internal/db/migrate/{20150310190253_create_searches.blacklight.rb → 20150320153151_create_searches.blacklight.rb} +0 -0
  33. data/spec/internal/db/migrate/{20150310190254_create_bookmarks.blacklight.rb → 20150320153152_create_bookmarks.blacklight.rb} +0 -0
  34. data/spec/internal/db/migrate/{20150310190255_add_polymorphic_type_to_bookmarks.blacklight.rb → 20150320153153_add_polymorphic_type_to_bookmarks.blacklight.rb} +0 -0
  35. data/spec/internal/db/schema.rb +1 -1
  36. data/spec/internal/db/test.sqlite3 +0 -0
  37. data/spec/internal/log/development.log +75 -73
  38. data/spec/internal/log/test.log +85076 -0
  39. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_alerts.scssc +0 -0
  40. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_background-variant.scssc +0 -0
  41. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_border-radius.scssc +0 -0
  42. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_buttons.scssc +0 -0
  43. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_center-block.scssc +0 -0
  44. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_clearfix.scssc +0 -0
  45. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_forms.scssc +0 -0
  46. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_gradients.scssc +0 -0
  47. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid-framework.scssc +0 -0
  48. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid.scssc +0 -0
  49. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_hide-text.scssc +0 -0
  50. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_image.scssc +0 -0
  51. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_labels.scssc +0 -0
  52. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_list-group.scssc +0 -0
  53. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-divider.scssc +0 -0
  54. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-vertical-align.scssc +0 -0
  55. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_opacity.scssc +0 -0
  56. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_pagination.scssc +0 -0
  57. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_panels.scssc +0 -0
  58. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_progress-bar.scssc +0 -0
  59. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_reset-filter.scssc +0 -0
  60. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_resize.scssc +0 -0
  61. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_responsive-visibility.scssc +0 -0
  62. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_size.scssc +0 -0
  63. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_tab-focus.scssc +0 -0
  64. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_table-row.scssc +0 -0
  65. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-emphasis.scssc +0 -0
  66. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-overflow.scssc +0 -0
  67. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_vendor-prefixes.scssc +0 -0
  68. data/spec/internal/tmp/cache/assets/test/sass/93e201cf4a11978a1f491a057a3bd569c3825210/blacklight.css.scssc +0 -0
  69. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_alerts.scssc +0 -0
  70. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_badges.scssc +0 -0
  71. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_breadcrumbs.scssc +0 -0
  72. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_button-groups.scssc +0 -0
  73. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_buttons.scssc +0 -0
  74. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_carousel.scssc +0 -0
  75. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_close.scssc +0 -0
  76. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_code.scssc +0 -0
  77. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_component-animations.scssc +0 -0
  78. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_dropdowns.scssc +0 -0
  79. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_forms.scssc +0 -0
  80. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_glyphicons.scssc +0 -0
  81. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_grid.scssc +0 -0
  82. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_input-groups.scssc +0 -0
  83. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_jumbotron.scssc +0 -0
  84. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_labels.scssc +0 -0
  85. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_list-group.scssc +0 -0
  86. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_media.scssc +0 -0
  87. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_mixins.scssc +0 -0
  88. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_modals.scssc +0 -0
  89. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navbar.scssc +0 -0
  90. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navs.scssc +0 -0
  91. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_normalize.scssc +0 -0
  92. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pager.scssc +0 -0
  93. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pagination.scssc +0 -0
  94. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_panels.scssc +0 -0
  95. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_popovers.scssc +0 -0
  96. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_print.scssc +0 -0
  97. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_progress-bars.scssc +0 -0
  98. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-embed.scssc +0 -0
  99. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-utilities.scssc +0 -0
  100. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_scaffolding.scssc +0 -0
  101. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tables.scssc +0 -0
  102. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_thumbnails.scssc +0 -0
  103. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tooltip.scssc +0 -0
  104. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_type.scssc +0 -0
  105. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_utilities.scssc +0 -0
  106. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_variables.scssc +0 -0
  107. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_wells.scssc +0 -0
  108. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap-sprockets.scssc +0 -0
  109. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap.scssc +0 -0
  110. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_blacklight_base.scssc +0 -0
  111. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_bookmark.scssc +0 -0
  112. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_catalog.scssc +0 -0
  113. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_facets.scssc +0 -0
  114. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_group.scssc +0 -0
  115. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_header.scssc +0 -0
  116. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_layout.scssc +0 -0
  117. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_modal.scssc +0 -0
  118. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_search_history.scssc +0 -0
  119. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight.scssc +0 -0
  120. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight_defaults.scssc +0 -0
  121. data/spec/internal/tmp/cache/assets/test/sprockets/059eceaa18e2eaee3d5168f3949f4957 +0 -0
  122. data/spec/internal/tmp/cache/assets/test/sprockets/093d898bf3f20b26b13c82714a77c040 +0 -0
  123. data/spec/internal/tmp/cache/assets/test/sprockets/10517c9579f8d23c82fb8caa66dca6a7 +0 -0
  124. data/spec/internal/tmp/cache/assets/test/sprockets/13fe41fee1fe35b49d145bcc06610705 +0 -0
  125. data/spec/internal/tmp/cache/assets/test/sprockets/19f4ba6fa86d2609c171da9f5ee3b11a +0 -0
  126. data/spec/internal/tmp/cache/assets/test/sprockets/2bec18569db80effd80a2a19a038100b +0 -0
  127. data/spec/internal/tmp/cache/assets/test/sprockets/2f5173deea6c795b8fdde723bb4b63af +0 -0
  128. data/spec/internal/tmp/cache/assets/test/sprockets/30ca7caa200fa8eaddaef443913ad9ad +0 -0
  129. data/spec/internal/tmp/cache/assets/test/sprockets/350a65d446eb7398f96d102f7a1a6d69 +0 -0
  130. data/spec/internal/tmp/cache/assets/test/sprockets/357970feca3ac29060c1e3861e2c0953 +0 -0
  131. data/spec/internal/tmp/cache/assets/test/sprockets/3cc20a63495e66e9251101ccfb0d389c +0 -0
  132. data/spec/internal/tmp/cache/assets/test/sprockets/4052820c15af72ba690230a0f92bd75e +0 -0
  133. data/spec/internal/tmp/cache/assets/test/sprockets/41b62fb10530766b5b9fef3338f5f30b +0 -0
  134. data/spec/internal/tmp/cache/assets/test/sprockets/421c89c58d3e76046a0294dae1f731ff +0 -0
  135. data/spec/internal/tmp/cache/assets/test/sprockets/43da966542fafc2a4c768be78527391c +0 -0
  136. data/spec/internal/tmp/cache/assets/test/sprockets/496a0d7dce1ff6bf4a9c3a089ea3a635 +0 -0
  137. data/spec/internal/tmp/cache/assets/test/sprockets/4e93570db72bb61820711448b78d548d +0 -0
  138. data/spec/internal/tmp/cache/assets/test/sprockets/50b9db0b908b421a9b941a445dbaeacc +0 -0
  139. data/spec/internal/tmp/cache/assets/test/sprockets/5155772cce704d83a64aa83eeda20c5a +0 -0
  140. data/spec/internal/tmp/cache/assets/test/sprockets/528c628cf107f8be6dd122e1154344be +0 -0
  141. data/spec/internal/tmp/cache/assets/test/sprockets/5ed02c7072bb024fa0832a1ca4113227 +0 -0
  142. data/spec/internal/tmp/cache/assets/test/sprockets/60141dc3d16c502963c3d798a099862d +0 -0
  143. data/spec/internal/tmp/cache/assets/test/sprockets/6df5b5c0a8e6d87ced9504a460df875e +0 -0
  144. data/spec/internal/tmp/cache/assets/test/sprockets/6eefd2765a716d66ff857cfcda69a119 +0 -0
  145. data/spec/internal/tmp/cache/assets/test/sprockets/813e23719129cf9f19349f720bebdf70 +0 -0
  146. data/spec/internal/tmp/cache/assets/test/sprockets/88ded10f28177bec1bddb0d72d6b1cd2 +0 -0
  147. data/spec/internal/tmp/cache/assets/test/sprockets/8ad5c3dc0f5fcc5d7a775d6294dda9f5 +0 -0
  148. data/spec/internal/tmp/cache/assets/test/sprockets/8edfca9082e02111be92e79000667f22 +0 -0
  149. data/spec/internal/tmp/cache/assets/test/sprockets/90b54a819800edfa41b67722d1561040 +0 -0
  150. data/spec/internal/tmp/cache/assets/test/sprockets/93aaa9231a32901266b05632f3d35ecd +0 -0
  151. data/spec/internal/tmp/cache/assets/test/sprockets/9c653367feff82588eb6041d783a5809 +0 -0
  152. data/spec/internal/tmp/cache/assets/test/sprockets/9ce190f082dc4fb74bddb818cc011ce6 +0 -0
  153. data/spec/internal/tmp/cache/assets/test/sprockets/9f68eb44ffcb9dd95623c179300414fd +0 -0
  154. data/spec/internal/tmp/cache/assets/test/sprockets/ab2a05b91d7d316d4ceb47ce067006fb +0 -0
  155. data/spec/internal/tmp/cache/assets/test/sprockets/ad4a54c43c2a4c3874bfde1c9f08c248 +0 -0
  156. data/spec/internal/tmp/cache/assets/test/sprockets/b1a42de74c934edd0e5eed0f98a23597 +0 -0
  157. data/spec/internal/tmp/cache/assets/test/sprockets/b1c03c36188b4f58f819510adb9a7e9c +0 -0
  158. data/spec/internal/tmp/cache/assets/test/sprockets/b225d67626669cb154cbe08a3c439d48 +0 -0
  159. data/spec/internal/tmp/cache/assets/test/sprockets/b35e12934e9f05662777579549e31cd7 +0 -0
  160. data/spec/internal/tmp/cache/assets/test/sprockets/b3ae9e3cdd9991d7103b0a00e33b5778 +0 -0
  161. data/spec/internal/tmp/cache/assets/test/sprockets/b42c247628daaf44109584724682a6ad +0 -0
  162. data/spec/internal/tmp/cache/assets/test/sprockets/b660ec1ede271559cfd0259eed96ae9c +0 -0
  163. data/spec/internal/tmp/cache/assets/test/sprockets/ba078b29a7f067b3acdd538a286235a8 +0 -0
  164. data/spec/internal/tmp/cache/assets/test/sprockets/bb108ef3fc4c96d1c20cc41f97d943a0 +0 -0
  165. data/spec/internal/tmp/cache/assets/test/sprockets/bb92f4b8c1bedafe60b6f226b549138a +0 -0
  166. data/spec/internal/tmp/cache/assets/test/sprockets/c644ecad928e076f999e9c9a3bc350c9 +0 -0
  167. data/spec/internal/tmp/cache/assets/test/sprockets/c78dd20a0df27e9394a413bf2e4abf92 +0 -0
  168. data/spec/internal/tmp/cache/assets/test/sprockets/c9efce42580b0ad0374a052aa61f2a07 +0 -0
  169. data/spec/internal/tmp/cache/assets/test/sprockets/cffd775d018f68ce5dba1ee0d951a994 +0 -0
  170. data/spec/internal/tmp/cache/assets/test/sprockets/d1bce36d6e3ab792562b9c929f2ce897 +0 -0
  171. data/spec/internal/tmp/cache/assets/test/sprockets/d24a7c4b8ac71a90e809881a64970898 +0 -0
  172. data/spec/internal/tmp/cache/assets/test/sprockets/d771ace226fc8215a3572e0aa35bb0d6 +0 -0
  173. data/spec/internal/tmp/cache/assets/test/sprockets/db0d9534ecdebba33af5d1d81060e855 +0 -0
  174. data/spec/internal/tmp/cache/assets/test/sprockets/dbba4bbc32c17ade3d618c5d0baeb371 +0 -0
  175. data/spec/internal/tmp/cache/assets/test/sprockets/e8276b98892dcac8285a592bb63fe75b +0 -0
  176. data/spec/internal/tmp/cache/assets/test/sprockets/e9f7ccc553ce1a217709cc7a08cfb032 +0 -0
  177. data/spec/internal/tmp/cache/assets/test/sprockets/ecfc314951b349e27742c1b26880619e +0 -0
  178. data/spec/internal/tmp/cache/assets/test/sprockets/eddae0741d973a8ab3df27c1ea5b635e +0 -0
  179. data/spec/internal/tmp/cache/assets/test/sprockets/f274b5f22db177b6464b50691d531688 +0 -0
  180. data/spec/internal/tmp/cache/assets/test/sprockets/f3935581a84aba9a499005ed6a8e18be +0 -0
  181. data/spec/internal/tmp/cache/assets/test/sprockets/f3e7729c53b8a2c408086a61b58922ae +0 -0
  182. data/spec/internal/tmp/cache/assets/test/sprockets/f45dfdb34e84d3c72a3319399a7316a3 +0 -0
  183. data/spec/internal/tmp/cache/assets/test/sprockets/f74af03e26e893af40b07a6d0f970cd4 +0 -0
  184. data/spec/internal/tmp/cache/assets/test/sprockets/f7cbd26ba1d28d48de824f0e94586655 +0 -0
  185. data/spec/internal/tmp/cache/assets/test/sprockets/fa11f1beb15b0f04503ce1f06701a8bf +0 -0
  186. data/spec/internal/tmp/cache/assets/test/sprockets/faac2909046aa87e1f1f08b2bf0ad2b2 +0 -0
  187. data/spec/lib/krikri/enricher_spec.rb +130 -0
  188. data/spec/lib/krikri/enrichments/dedup_values_spec.rb~ +10 -0
  189. data/spec/lib/krikri/enrichments/genre_filter_spec.rb +21 -0
  190. data/spec/lib/krikri/enrichments/remove_empty_fields_spec.rb +1 -0
  191. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb +28 -0
  192. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb~ +16 -0
  193. data/spec/lib/krikri/enrichments/strip_html_spec.rb +1 -0
  194. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb +16 -0
  195. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb~ +16 -0
  196. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb +16 -0
  197. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb~ +16 -0
  198. data/spec/lib/krikri/enrichments/strip_punctuation_spec.rb +1 -0
  199. data/spec/lib/krikri/enrichments/strip_whitespace_spec.rb +1 -0
  200. data/spec/lib/krikri/enrichments/timespan_split_spec.rb +15 -0
  201. data/spec/lib/krikri/harvesters/api_harvester_spec.rb +98 -0
  202. data/spec/lib/krikri/harvesters/api_harvester_spec.rb~ +47 -0
  203. data/spec/lib/krikri/harvesters/mdl_api_harvester_spec.rb~ +11 -0
  204. data/spec/lib/krikri/harvesters/mdl_harvester_spec.rb~ +8 -0
  205. data/spec/lib/krikri/harvesters/oai_harvester_spec.rb +12 -6
  206. data/spec/lib/krikri/job_spec.rb +40 -0
  207. data/spec/lib/krikri/mapper_agent_spec.rb +10 -6
  208. data/spec/lib/krikri/util/extended_date_parser_spec.rb +14 -2
  209. data/spec/models/search_index_document_spec.rb +8 -0
  210. data/spec/spec_helper.rb +5 -0
  211. data/spec/support/shared_examples/harvester.rb +8 -1
  212. data/spec/support/shared_examples/rdf_source.rb +22 -0
  213. data/spec/support/shared_examples/string_enrichment.rb +13 -4
  214. metadata +365 -21
  215. data/lib/krikri/enrichments/capitalize.rb~ +0 -24
  216. data/lib/krikri/enrichments/genre_filter.rb~ +0 -9
  217. data/lib/krikri/enrichments/iso_enrcich.rb~ +0 -8
  218. data/lib/krikri/enrichments/iso_enrich.rb~ +0 -13
  219. data/lib/krikri/enrichments/parse_date.rb~ +0 -10
  220. data/lib/krikri/enrichments/strip_html.rb~ +0 -16
  221. data/lib/krikri/enrichments/timespan.rb~ +0 -6
  222. data/lib/krikri/enrichments/timespan_split.rb~ +0 -83
@@ -0,0 +1,164 @@
1
+ module Krikri::Harvesters
2
+ ##
3
+ # A harvester implementation for REST APIs. The default ApiHarvester expects
4
+ # Solr-like JSON responses/records.
5
+ #
6
+ # An internal interface is provided for easier subclassing. A new API
7
+ # harvester may reimplement:
8
+ # - #get_docs (to retrieve record docs from a response)
9
+ # - #get_count (to determine total record count from a response)
10
+ # - #get_identifier (to retrieve an indentifier from a record document)
11
+ # - #get_content (to retrieve a content string from a record document)
12
+ # - #next_options` (to generate the parameters for the next request)
13
+ #
14
+ # If the content type of the records is other than JSON, you will also want
15
+ # to override `#content_type`.
16
+ class ApiHarvester
17
+ include Krikri::Harvester
18
+
19
+ attr_reader :opts
20
+
21
+ ##
22
+ # @param opts [Hash] options for the harvester
23
+ # @see .expected_opts
24
+ def initialize(opts = {})
25
+ super
26
+ @opts = opts.fetch(:api, {})
27
+ end
28
+
29
+ ##
30
+ # @return [Hash] A hash documenting the allowable options to pass to
31
+ # initializers.
32
+ #
33
+ # @see Krikri::Harvester::expected_opts
34
+ def self.expected_opts
35
+ {
36
+ key: :api,
37
+ opts: {
38
+ params: { type: :string, required: false }
39
+ }
40
+ }
41
+ end
42
+
43
+ ##
44
+ # @see Krikri::Harvester#count
45
+ def count
46
+ get_count(request(opts))
47
+ end
48
+
49
+ ##
50
+ # @return [Enumerator::Lazy] an enumerator of the records targeted by this
51
+ # harvester.
52
+ def records
53
+ enumerate_records.lazy.map { |rec| build_record(rec) }
54
+ end
55
+
56
+ ##
57
+ # Gets a single record with the given identifier from the API
58
+ #
59
+ # @return [Enumerator::Lazy] an enumerator over the ids for the records
60
+ # targeted by this harvester.
61
+ def record_ids
62
+ enumerate_records.lazy.map { |r| get_identifier(r) }
63
+ end
64
+
65
+ ##
66
+ # @param identifier [#to_s] the identifier of the record to get
67
+ # @return [#to_s] the record
68
+ def get_record(identifier)
69
+ response = request(:params => { :q => "id:#{identifier.to_s}" })
70
+ build_record(get_docs(response).first)
71
+ end
72
+
73
+ ##
74
+ # @return [String] the content type for the records generated by this
75
+ # harvester
76
+ def content_type
77
+ 'application/json'
78
+ end
79
+
80
+ private
81
+
82
+ ##
83
+ # @param doc [#to_s] a raw record document with an identifier
84
+ #
85
+ # @return [String] the provider's identifier for the document
86
+ def get_identifier(doc)
87
+ doc['record_id']
88
+ end
89
+
90
+ ##
91
+ # @param response [#to_s] a response from the REST API
92
+ #
93
+ # @return [Integer] a count of the total records found by the request
94
+ def get_count(response)
95
+ response['response']['numFound']
96
+ end
97
+
98
+ ##
99
+ # @param response [#to_s] a response from the REST API
100
+ #
101
+ # @return [Array] an array of record documents from the response
102
+ def get_docs(response)
103
+ response['response']['docs']
104
+ end
105
+
106
+ ##
107
+ # @param doc [#to_s] a raw record document
108
+ #
109
+ # @return [String] the record content
110
+ def get_content(doc)
111
+ doc.to_json
112
+ end
113
+
114
+ ##
115
+ # Send a request via `RestClient`, and parse the result as JSON
116
+ def request(request_opts)
117
+ JSON.parse(RestClient.get(uri, request_opts))
118
+ end
119
+
120
+ ##
121
+ # Given a current set of options and a number of records from the last
122
+ # request, generate the options for the next request.
123
+ #
124
+ # @param opts [Hash] an options hash from the previous request
125
+ # @param record_count [#to_i]
126
+ #
127
+ # @return [Hash] the next request's options hash
128
+ def next_options(opts, record_count)
129
+ old_start = opts['params'].fetch('start', 0)
130
+ opts['params']['start'] = old_start.to_i + record_count
131
+ opts
132
+ end
133
+
134
+ ##
135
+ # @return [Enumerator] an enumerator over the records
136
+ def enumerate_records
137
+ Enumerator.new do |yielder|
138
+ request_opts = opts.deep_dup
139
+ loop do
140
+ break if request_opts.nil?
141
+ docs = get_docs(request(request_opts.dup))
142
+ break if docs.empty?
143
+
144
+ docs.each { |r| yielder << r }
145
+
146
+ request_opts = next_options(request_opts, docs.count)
147
+ end
148
+ end
149
+ end
150
+
151
+ ##
152
+ # Builds an instance of `@record_class` with the given doc's JSON as
153
+ # content.
154
+ #
155
+ # @param doc [#to_json] the content to serialize as JSON in `#content`
156
+ # @return [#to_s] an instance of @record_class with a minted id and
157
+ # content the given content
158
+ def build_record(doc)
159
+ @record_class.build(mint_id(get_identifier(doc)),
160
+ get_content(doc),
161
+ content_type)
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,44 @@
1
+ module Krikri::Harvesters
2
+ ##
3
+ # A harvester implementation for REST APIs
4
+ class ApiHarvester
5
+ include Krikri::Harvester
6
+
7
+ def initialize(opts = {})
8
+ super
9
+ @opts = opts.fetch(:api, {})
10
+ end
11
+
12
+ ##
13
+ # @return [Hash] A hash documenting the allowable options to pass to
14
+ # initializers.
15
+ #
16
+ # @see Krikri::Harvester::expected_opts
17
+ def self.expected_opts
18
+ {
19
+ key: :api,
20
+ opts: {
21
+ params: { type: :string, required: false }
22
+ }
23
+ }
24
+ end
25
+
26
+ ##
27
+ # @see Krikri::Harvester#count
28
+ def count
29
+ JSON.parse(request(opts['params']))['response']['numFound']
30
+ end
31
+
32
+ ##
33
+ # @return [Enumerator::Lazy] an enumerator of the records targeted by this
34
+ # harvester.
35
+ def records
36
+ end
37
+
38
+ private
39
+
40
+ def request(opts)
41
+ RestClient.get uri, opts
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,7 @@
1
+ module Krikri::Harvesters
2
+ ##
3
+ # A harvester implementation for Minnesota Digital Library's API
4
+ class MdlApiHarvester
5
+
6
+ end
7
+ end
@@ -0,0 +1,4 @@
1
+ module Krikri::Harvesters
2
+ ##
3
+ # A harvester implementation for MDL's API
4
+ class MdlHarvester
@@ -156,6 +156,14 @@ module Krikri::Harvesters
156
156
  ##
157
157
  # Runs the request in the given block against the sets specified in `opts`.
158
158
  # Results are concatenated into a single enumerator
159
+ #
160
+ # @param opts [Hash] the options to pass, including all sets to process.
161
+ # @yield [set_opts] gives options to the block once for each set. The
162
+ # block should run the harvest action with the options and give an
163
+ # Enumerable.
164
+ #
165
+ # @return [Enumerator::Lazy] A lazy enumerator concatenating the results
166
+ # of the block with each set.
159
167
  def request_with_sets(opts, &block)
160
168
  sets = Array(opts.delete(:set))
161
169
  if opts[:skip_set]
@@ -2,6 +2,8 @@ module Krikri::LDP
2
2
  ##
3
3
  # Adds simple LDP persistence to ActiveTriples::Resource classes
4
4
  # @see ActiveTriples::Resource
5
+ #
6
+ # @see http://www.w3.org/TR/ldp/#ldprs
5
7
  module RdfSource
6
8
  extend ActiveSupport::Concern
7
9
  include Krikri::LDP::Resource
@@ -30,6 +32,34 @@ module Krikri::LDP
30
32
  result
31
33
  end
32
34
 
35
+ ##
36
+ # Adds an appropritate provenance statement with the given URI and saves
37
+ # the resource.
38
+ #
39
+ # This method treats RDFSources as stateful resources. This is in conflict
40
+ # with the PROV model, which assumes each revision is its own Resource. The
41
+ # internal predicate `dpla:wasRevisedBy` is used for non-generating
42
+ # revisions of stateful RDFSources.
43
+ #
44
+ # @todo Assuming a Marmotta LDP server, there are version URIs available
45
+ # (via Memento) which could be used for a direct PROV implementation.
46
+ # Consider options for doing that either alongside or in place of this
47
+ # approach.
48
+ #
49
+ # @param activity_uri [#to_term] the URI of the prov:Activity to mark as
50
+ # generating or revising the saved resource.
51
+ #
52
+ # @see #save
53
+ #
54
+ # @see http://www.w3.org/TR/prov-primer/
55
+ # @see http://www.w3.org/TR/2013/REC-prov-o-20130430/
56
+ def save_with_provenance(activity_uri)
57
+ predicate =
58
+ exists? ? RDF::DPLA.wasRevisedBy : RDF::PROV.wasGeneratedBy
59
+ self << RDF::Statement(self, predicate, activity_uri)
60
+ save
61
+ end
62
+
33
63
  private
34
64
 
35
65
  ##
data/lib/krikri/mapper.rb CHANGED
@@ -109,9 +109,7 @@ module Krikri
109
109
  Krikri::Mapper.map(name, records).each do |rec|
110
110
  begin
111
111
  rec.mint_id! if rec.node?
112
- rec << RDF::Statement(rec, RDF::PROV.wasGeneratedBy, activity_uri) if
113
- activity_uri
114
- rec.save
112
+ activity_uri ? rec.save_with_provenance(activity_uri) : rec.save
115
113
  rescue => e
116
114
  Rails.logger.error("Error saving record: #{rec.rdf_subject}\n" \
117
115
  "#{e.message}\n#{e.backtrace}")
@@ -6,11 +6,21 @@ module Krikri
6
6
 
7
7
  module_function
8
8
 
9
+ ##
10
+ # Finds all entities generated or revised by the activity whose URI is
11
+ # given.
12
+ #
13
+ # @param activity_uri [#to_uri] the URI of the activity to search
14
+ #
15
+ # @return [RDF::SPARQL::Query] a query object that, when executed, will
16
+ # give solutions containing the URIs for the resources in `#record`.
9
17
  def find_by_activity(activity_uri)
10
18
  raise ArgumentError, 'activity_uri must be an RDF::URI' unless
11
19
  activity_uri.respond_to? :to_uri
12
20
  SPARQL_CLIENT.select(:record)
13
- .where([:record, RDF::PROV.wasGeneratedBy, activity_uri])
21
+ .where([:record,
22
+ [RDF::PROV.wasGeneratedBy, '|', RDF::DPLA.wasRevisedBy],
23
+ activity_uri])
14
24
  end
15
25
  end
16
26
  end
@@ -1,4 +1,6 @@
1
1
  module Krikri::Util
2
+ ##
3
+ # Utilities to parse string values into EDTF dates or Intervals.
2
4
  module ExtendedDateParser
3
5
  module_function
4
6
 
@@ -20,18 +22,21 @@ module Krikri::Util
20
22
  # #range_match to look for range values.
21
23
  #
22
24
  # @return [Date, EDTF::Epoch, EDTF::Interval, nil] the date parsed or nil
25
+ # @see http://www.loc.gov/standards/datetime/
23
26
  def parse(date_str, allow_interval = false)
24
- date_str.strip!
25
- date_str.gsub!(/\s+/, ' ')
26
- date = parse_interval(date_str) if allow_interval
27
- date ||= parse_m_d_y(date_str)
28
- date ||= Date.edtf(date_str.gsub('.', '-'))
29
- date ||= partial_edtf(date_str)
30
- date ||= decade_hyphen(date_str)
31
- date ||= month_year(date_str)
32
- date ||= decade_s(date_str)
33
- date ||= hyphenated_partial_range(date_str)
34
- date ||= parse_date(date_str)
27
+ str = preprocess(date_str.dup)
28
+ date = parse_interval(str) if allow_interval
29
+ date ||= parse_m_d_y(str)
30
+ date ||= Date.edtf(str.gsub('.', '-'))
31
+ date ||= partial_edtf(str)
32
+ date ||= decade_hyphen(str)
33
+ date ||= month_year(str)
34
+ date ||= decade_s(str)
35
+ date ||= hyphenated_partial_range(str)
36
+ date ||= parse_date(str)
37
+ # Only do this if certian letters are present to avoid infinite loops.
38
+ date ||= circa(str) if str.match(/[circabout]/i)
39
+ date = date.first if date.is_a? EDTF::Set
35
40
  date || nil
36
41
  end
37
42
 
@@ -55,6 +60,36 @@ module Krikri::Util
55
60
  end
56
61
  end
57
62
 
63
+ ##
64
+ # Preprocess the date string to remove extra whitespace and convert ad hoc
65
+ # formatting to equivalent EDTF.
66
+ #
67
+ # @todo should '-` be intepreted as 'x' or '?'
68
+ # @see http://www.loc.gov/standards/datetime/pre-submission.html#maskedprecision
69
+ def preprocess(str)
70
+ str.gsub!(/late/i, '')
71
+ str.gsub!(/early/i, '')
72
+ str.strip!
73
+ str.gsub!(/\s+/, ' ')
74
+ str.gsub!('0s', 'x') if str.match(/^[1-9]+0s$/)
75
+ str.gsub!('-', 'x') if str.match(/^[1-9]+\-+$/)
76
+ str
77
+ end
78
+
79
+ ##
80
+ # Remove 'circa' or 'about' or variations and return an uncertian ETDF
81
+ # dates.
82
+ #
83
+ # @param str [String]
84
+ # @return [Date, nil] an EDTF date, marked uncertian; or `nil`
85
+ # @see #parse
86
+ def circa(str)
87
+ run = str.gsub!(/.*c[irca\.]*/i, '')
88
+ run ||= str.gsub!(/.*about/i, '')
89
+ date = parse(str) if run
90
+ date.nil? ? nil : date.uncertain!
91
+ end
92
+
58
93
  ##
59
94
  # Creates an EDTF::Interval from a string
60
95
  #
@@ -1,3 +1,3 @@
1
1
  module Krikri
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -1,11 +1,11 @@
1
1
  PATH
2
2
  remote: /home/tjohnson/src/dpla/krikri
3
3
  specs:
4
- krikri (0.3.2)
4
+ krikri (0.3.3)
5
5
  analysand (= 4.0.0)
6
6
  blacklight (~> 5.8.0)
7
7
  devise (~> 3.4.1)
8
- dpla-map (= 4.0.0.0.pre.9)
8
+ dpla-map (= 4.0.0.0.pre.10)
9
9
  edtf
10
10
  jsonpath
11
11
  oai
@@ -61,7 +61,7 @@ GEM
61
61
  rack
62
62
  yajl-ruby
63
63
  arel (5.0.1.20140414130214)
64
- autoprefixer-rails (5.1.7)
64
+ autoprefixer-rails (5.1.7.1)
65
65
  execjs
66
66
  json
67
67
  bcrypt (3.1.10)
@@ -72,7 +72,7 @@ GEM
72
72
  nokogiri (~> 1.6)
73
73
  rails (>= 3.2.6, < 5)
74
74
  rsolr (~> 1.0.6)
75
- bootstrap-sass (3.3.3)
75
+ bootstrap-sass (3.3.4.1)
76
76
  autoprefixer-rails (>= 5.0.0.1)
77
77
  sass (>= 3.2.19)
78
78
  builder (3.2.2)
@@ -100,10 +100,10 @@ GEM
100
100
  responders
101
101
  thread_safe (~> 0.1)
102
102
  warden (~> 1.2.3)
103
- dpla-map (4.0.0.0.pre.9)
104
- active-triples (~> 0.3)
105
- linked_vocabs (~> 0.1)
106
- ebnf (0.3.6)
103
+ dpla-map (4.0.0.0.pre.10)
104
+ active-triples (~> 0.6.0)
105
+ linked_vocabs (~> 0.2.0)
106
+ ebnf (0.3.7)
107
107
  haml (~> 4.0)
108
108
  rdf (~> 1.1)
109
109
  sxp (~> 0.1, >= 0.1.3)
@@ -130,7 +130,7 @@ GEM
130
130
  htmlentities (4.3.3)
131
131
  http_parser.rb (0.6.0)
132
132
  i18n (0.7.0)
133
- jbuilder (2.2.11)
133
+ jbuilder (2.2.12)
134
134
  activesupport (>= 3.0.0, < 5)
135
135
  multi_json (~> 1.2)
136
136
  jettywrapper (2.0.3)
@@ -153,8 +153,8 @@ GEM
153
153
  activesupport (>= 3.0.0)
154
154
  libv8 (3.16.14.7)
155
155
  link_header (0.0.8)
156
- linked_vocabs (0.1.0)
157
- active-triples (>= 0.1.0)
156
+ linked_vocabs (0.2.0)
157
+ active-triples (>= 0.6.0)
158
158
  rake
159
159
  rdf (>= 1.1.2.1)
160
160
  sparql
@@ -329,7 +329,7 @@ GEM
329
329
  libv8 (~> 3.16.14.0)
330
330
  ref
331
331
  thor (0.19.1)
332
- thread_safe (0.3.4)
332
+ thread_safe (0.3.5)
333
333
  tilt (1.4.1)
334
334
  timers (4.0.1)
335
335
  hitimes