krikri 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/krikri/records_controller.rb +14 -0
  3. data/app/models/krikri/search_index_document.rb +10 -0
  4. data/lib/krikri/engine.rb +1 -0
  5. data/lib/krikri/enricher.rb +156 -0
  6. data/lib/krikri/enrichment.rb +0 -2
  7. data/lib/krikri/enrichments/{deduplication.rb~ → dedup_values.rb~} +1 -4
  8. data/lib/krikri/enrichments/genre_filter.rb +45 -0
  9. data/lib/krikri/enrichments/strip_ending_punctuation.rb +21 -0
  10. data/lib/krikri/enrichments/strip_ending_punctuation.rb~ +18 -0
  11. data/lib/krikri/enrichments/strip_leading_colons.rb +15 -0
  12. data/lib/krikri/enrichments/strip_leading_colons.rb~ +15 -0
  13. data/lib/krikri/enrichments/strip_leading_punctuation.rb +18 -0
  14. data/lib/krikri/enrichments/strip_leading_punctuation.rb~ +18 -0
  15. data/lib/krikri/enrichments/timespan_split.rb +43 -1
  16. data/lib/krikri/harvesters/api_harvester.rb +164 -0
  17. data/lib/krikri/harvesters/api_harvester.rb~ +44 -0
  18. data/lib/krikri/harvesters/mdl_api_harvester.rb~ +7 -0
  19. data/lib/krikri/harvesters/mdl_harvester.rb~ +4 -0
  20. data/lib/krikri/harvesters/oai_harvester.rb +8 -0
  21. data/lib/krikri/ldp/rdf_source.rb +30 -0
  22. data/lib/krikri/mapper.rb +1 -3
  23. data/lib/krikri/provenance_query_client.rb +11 -1
  24. data/lib/krikri/util/extended_date_parser.rb +46 -11
  25. data/lib/krikri/version.rb +1 -1
  26. data/spec/internal/Gemfile.lock +12 -12
  27. data/spec/internal/config/initializers/blacklight_initializer.rb +1 -1
  28. data/spec/internal/config/initializers/devise.rb +2 -2
  29. data/spec/internal/config/secrets.yml +2 -2
  30. data/spec/internal/db/development.sqlite3 +0 -0
  31. data/spec/internal/db/migrate/{20150310190235_devise_create_users.rb → 20150320153132_devise_create_users.rb} +0 -0
  32. data/spec/internal/db/migrate/{20150310190253_create_searches.blacklight.rb → 20150320153151_create_searches.blacklight.rb} +0 -0
  33. data/spec/internal/db/migrate/{20150310190254_create_bookmarks.blacklight.rb → 20150320153152_create_bookmarks.blacklight.rb} +0 -0
  34. data/spec/internal/db/migrate/{20150310190255_add_polymorphic_type_to_bookmarks.blacklight.rb → 20150320153153_add_polymorphic_type_to_bookmarks.blacklight.rb} +0 -0
  35. data/spec/internal/db/schema.rb +1 -1
  36. data/spec/internal/db/test.sqlite3 +0 -0
  37. data/spec/internal/log/development.log +75 -73
  38. data/spec/internal/log/test.log +85076 -0
  39. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_alerts.scssc +0 -0
  40. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_background-variant.scssc +0 -0
  41. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_border-radius.scssc +0 -0
  42. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_buttons.scssc +0 -0
  43. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_center-block.scssc +0 -0
  44. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_clearfix.scssc +0 -0
  45. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_forms.scssc +0 -0
  46. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_gradients.scssc +0 -0
  47. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid-framework.scssc +0 -0
  48. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid.scssc +0 -0
  49. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_hide-text.scssc +0 -0
  50. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_image.scssc +0 -0
  51. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_labels.scssc +0 -0
  52. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_list-group.scssc +0 -0
  53. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-divider.scssc +0 -0
  54. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-vertical-align.scssc +0 -0
  55. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_opacity.scssc +0 -0
  56. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_pagination.scssc +0 -0
  57. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_panels.scssc +0 -0
  58. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_progress-bar.scssc +0 -0
  59. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_reset-filter.scssc +0 -0
  60. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_resize.scssc +0 -0
  61. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_responsive-visibility.scssc +0 -0
  62. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_size.scssc +0 -0
  63. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_tab-focus.scssc +0 -0
  64. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_table-row.scssc +0 -0
  65. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-emphasis.scssc +0 -0
  66. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-overflow.scssc +0 -0
  67. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_vendor-prefixes.scssc +0 -0
  68. data/spec/internal/tmp/cache/assets/test/sass/93e201cf4a11978a1f491a057a3bd569c3825210/blacklight.css.scssc +0 -0
  69. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_alerts.scssc +0 -0
  70. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_badges.scssc +0 -0
  71. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_breadcrumbs.scssc +0 -0
  72. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_button-groups.scssc +0 -0
  73. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_buttons.scssc +0 -0
  74. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_carousel.scssc +0 -0
  75. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_close.scssc +0 -0
  76. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_code.scssc +0 -0
  77. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_component-animations.scssc +0 -0
  78. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_dropdowns.scssc +0 -0
  79. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_forms.scssc +0 -0
  80. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_glyphicons.scssc +0 -0
  81. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_grid.scssc +0 -0
  82. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_input-groups.scssc +0 -0
  83. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_jumbotron.scssc +0 -0
  84. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_labels.scssc +0 -0
  85. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_list-group.scssc +0 -0
  86. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_media.scssc +0 -0
  87. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_mixins.scssc +0 -0
  88. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_modals.scssc +0 -0
  89. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navbar.scssc +0 -0
  90. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navs.scssc +0 -0
  91. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_normalize.scssc +0 -0
  92. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pager.scssc +0 -0
  93. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pagination.scssc +0 -0
  94. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_panels.scssc +0 -0
  95. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_popovers.scssc +0 -0
  96. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_print.scssc +0 -0
  97. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_progress-bars.scssc +0 -0
  98. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-embed.scssc +0 -0
  99. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-utilities.scssc +0 -0
  100. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_scaffolding.scssc +0 -0
  101. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tables.scssc +0 -0
  102. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_thumbnails.scssc +0 -0
  103. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tooltip.scssc +0 -0
  104. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_type.scssc +0 -0
  105. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_utilities.scssc +0 -0
  106. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_variables.scssc +0 -0
  107. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_wells.scssc +0 -0
  108. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap-sprockets.scssc +0 -0
  109. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap.scssc +0 -0
  110. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_blacklight_base.scssc +0 -0
  111. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_bookmark.scssc +0 -0
  112. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_catalog.scssc +0 -0
  113. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_facets.scssc +0 -0
  114. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_group.scssc +0 -0
  115. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_header.scssc +0 -0
  116. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_layout.scssc +0 -0
  117. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_modal.scssc +0 -0
  118. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_search_history.scssc +0 -0
  119. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight.scssc +0 -0
  120. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight_defaults.scssc +0 -0
  121. data/spec/internal/tmp/cache/assets/test/sprockets/059eceaa18e2eaee3d5168f3949f4957 +0 -0
  122. data/spec/internal/tmp/cache/assets/test/sprockets/093d898bf3f20b26b13c82714a77c040 +0 -0
  123. data/spec/internal/tmp/cache/assets/test/sprockets/10517c9579f8d23c82fb8caa66dca6a7 +0 -0
  124. data/spec/internal/tmp/cache/assets/test/sprockets/13fe41fee1fe35b49d145bcc06610705 +0 -0
  125. data/spec/internal/tmp/cache/assets/test/sprockets/19f4ba6fa86d2609c171da9f5ee3b11a +0 -0
  126. data/spec/internal/tmp/cache/assets/test/sprockets/2bec18569db80effd80a2a19a038100b +0 -0
  127. data/spec/internal/tmp/cache/assets/test/sprockets/2f5173deea6c795b8fdde723bb4b63af +0 -0
  128. data/spec/internal/tmp/cache/assets/test/sprockets/30ca7caa200fa8eaddaef443913ad9ad +0 -0
  129. data/spec/internal/tmp/cache/assets/test/sprockets/350a65d446eb7398f96d102f7a1a6d69 +0 -0
  130. data/spec/internal/tmp/cache/assets/test/sprockets/357970feca3ac29060c1e3861e2c0953 +0 -0
  131. data/spec/internal/tmp/cache/assets/test/sprockets/3cc20a63495e66e9251101ccfb0d389c +0 -0
  132. data/spec/internal/tmp/cache/assets/test/sprockets/4052820c15af72ba690230a0f92bd75e +0 -0
  133. data/spec/internal/tmp/cache/assets/test/sprockets/41b62fb10530766b5b9fef3338f5f30b +0 -0
  134. data/spec/internal/tmp/cache/assets/test/sprockets/421c89c58d3e76046a0294dae1f731ff +0 -0
  135. data/spec/internal/tmp/cache/assets/test/sprockets/43da966542fafc2a4c768be78527391c +0 -0
  136. data/spec/internal/tmp/cache/assets/test/sprockets/496a0d7dce1ff6bf4a9c3a089ea3a635 +0 -0
  137. data/spec/internal/tmp/cache/assets/test/sprockets/4e93570db72bb61820711448b78d548d +0 -0
  138. data/spec/internal/tmp/cache/assets/test/sprockets/50b9db0b908b421a9b941a445dbaeacc +0 -0
  139. data/spec/internal/tmp/cache/assets/test/sprockets/5155772cce704d83a64aa83eeda20c5a +0 -0
  140. data/spec/internal/tmp/cache/assets/test/sprockets/528c628cf107f8be6dd122e1154344be +0 -0
  141. data/spec/internal/tmp/cache/assets/test/sprockets/5ed02c7072bb024fa0832a1ca4113227 +0 -0
  142. data/spec/internal/tmp/cache/assets/test/sprockets/60141dc3d16c502963c3d798a099862d +0 -0
  143. data/spec/internal/tmp/cache/assets/test/sprockets/6df5b5c0a8e6d87ced9504a460df875e +0 -0
  144. data/spec/internal/tmp/cache/assets/test/sprockets/6eefd2765a716d66ff857cfcda69a119 +0 -0
  145. data/spec/internal/tmp/cache/assets/test/sprockets/813e23719129cf9f19349f720bebdf70 +0 -0
  146. data/spec/internal/tmp/cache/assets/test/sprockets/88ded10f28177bec1bddb0d72d6b1cd2 +0 -0
  147. data/spec/internal/tmp/cache/assets/test/sprockets/8ad5c3dc0f5fcc5d7a775d6294dda9f5 +0 -0
  148. data/spec/internal/tmp/cache/assets/test/sprockets/8edfca9082e02111be92e79000667f22 +0 -0
  149. data/spec/internal/tmp/cache/assets/test/sprockets/90b54a819800edfa41b67722d1561040 +0 -0
  150. data/spec/internal/tmp/cache/assets/test/sprockets/93aaa9231a32901266b05632f3d35ecd +0 -0
  151. data/spec/internal/tmp/cache/assets/test/sprockets/9c653367feff82588eb6041d783a5809 +0 -0
  152. data/spec/internal/tmp/cache/assets/test/sprockets/9ce190f082dc4fb74bddb818cc011ce6 +0 -0
  153. data/spec/internal/tmp/cache/assets/test/sprockets/9f68eb44ffcb9dd95623c179300414fd +0 -0
  154. data/spec/internal/tmp/cache/assets/test/sprockets/ab2a05b91d7d316d4ceb47ce067006fb +0 -0
  155. data/spec/internal/tmp/cache/assets/test/sprockets/ad4a54c43c2a4c3874bfde1c9f08c248 +0 -0
  156. data/spec/internal/tmp/cache/assets/test/sprockets/b1a42de74c934edd0e5eed0f98a23597 +0 -0
  157. data/spec/internal/tmp/cache/assets/test/sprockets/b1c03c36188b4f58f819510adb9a7e9c +0 -0
  158. data/spec/internal/tmp/cache/assets/test/sprockets/b225d67626669cb154cbe08a3c439d48 +0 -0
  159. data/spec/internal/tmp/cache/assets/test/sprockets/b35e12934e9f05662777579549e31cd7 +0 -0
  160. data/spec/internal/tmp/cache/assets/test/sprockets/b3ae9e3cdd9991d7103b0a00e33b5778 +0 -0
  161. data/spec/internal/tmp/cache/assets/test/sprockets/b42c247628daaf44109584724682a6ad +0 -0
  162. data/spec/internal/tmp/cache/assets/test/sprockets/b660ec1ede271559cfd0259eed96ae9c +0 -0
  163. data/spec/internal/tmp/cache/assets/test/sprockets/ba078b29a7f067b3acdd538a286235a8 +0 -0
  164. data/spec/internal/tmp/cache/assets/test/sprockets/bb108ef3fc4c96d1c20cc41f97d943a0 +0 -0
  165. data/spec/internal/tmp/cache/assets/test/sprockets/bb92f4b8c1bedafe60b6f226b549138a +0 -0
  166. data/spec/internal/tmp/cache/assets/test/sprockets/c644ecad928e076f999e9c9a3bc350c9 +0 -0
  167. data/spec/internal/tmp/cache/assets/test/sprockets/c78dd20a0df27e9394a413bf2e4abf92 +0 -0
  168. data/spec/internal/tmp/cache/assets/test/sprockets/c9efce42580b0ad0374a052aa61f2a07 +0 -0
  169. data/spec/internal/tmp/cache/assets/test/sprockets/cffd775d018f68ce5dba1ee0d951a994 +0 -0
  170. data/spec/internal/tmp/cache/assets/test/sprockets/d1bce36d6e3ab792562b9c929f2ce897 +0 -0
  171. data/spec/internal/tmp/cache/assets/test/sprockets/d24a7c4b8ac71a90e809881a64970898 +0 -0
  172. data/spec/internal/tmp/cache/assets/test/sprockets/d771ace226fc8215a3572e0aa35bb0d6 +0 -0
  173. data/spec/internal/tmp/cache/assets/test/sprockets/db0d9534ecdebba33af5d1d81060e855 +0 -0
  174. data/spec/internal/tmp/cache/assets/test/sprockets/dbba4bbc32c17ade3d618c5d0baeb371 +0 -0
  175. data/spec/internal/tmp/cache/assets/test/sprockets/e8276b98892dcac8285a592bb63fe75b +0 -0
  176. data/spec/internal/tmp/cache/assets/test/sprockets/e9f7ccc553ce1a217709cc7a08cfb032 +0 -0
  177. data/spec/internal/tmp/cache/assets/test/sprockets/ecfc314951b349e27742c1b26880619e +0 -0
  178. data/spec/internal/tmp/cache/assets/test/sprockets/eddae0741d973a8ab3df27c1ea5b635e +0 -0
  179. data/spec/internal/tmp/cache/assets/test/sprockets/f274b5f22db177b6464b50691d531688 +0 -0
  180. data/spec/internal/tmp/cache/assets/test/sprockets/f3935581a84aba9a499005ed6a8e18be +0 -0
  181. data/spec/internal/tmp/cache/assets/test/sprockets/f3e7729c53b8a2c408086a61b58922ae +0 -0
  182. data/spec/internal/tmp/cache/assets/test/sprockets/f45dfdb34e84d3c72a3319399a7316a3 +0 -0
  183. data/spec/internal/tmp/cache/assets/test/sprockets/f74af03e26e893af40b07a6d0f970cd4 +0 -0
  184. data/spec/internal/tmp/cache/assets/test/sprockets/f7cbd26ba1d28d48de824f0e94586655 +0 -0
  185. data/spec/internal/tmp/cache/assets/test/sprockets/fa11f1beb15b0f04503ce1f06701a8bf +0 -0
  186. data/spec/internal/tmp/cache/assets/test/sprockets/faac2909046aa87e1f1f08b2bf0ad2b2 +0 -0
  187. data/spec/lib/krikri/enricher_spec.rb +130 -0
  188. data/spec/lib/krikri/enrichments/dedup_values_spec.rb~ +10 -0
  189. data/spec/lib/krikri/enrichments/genre_filter_spec.rb +21 -0
  190. data/spec/lib/krikri/enrichments/remove_empty_fields_spec.rb +1 -0
  191. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb +28 -0
  192. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb~ +16 -0
  193. data/spec/lib/krikri/enrichments/strip_html_spec.rb +1 -0
  194. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb +16 -0
  195. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb~ +16 -0
  196. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb +16 -0
  197. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb~ +16 -0
  198. data/spec/lib/krikri/enrichments/strip_punctuation_spec.rb +1 -0
  199. data/spec/lib/krikri/enrichments/strip_whitespace_spec.rb +1 -0
  200. data/spec/lib/krikri/enrichments/timespan_split_spec.rb +15 -0
  201. data/spec/lib/krikri/harvesters/api_harvester_spec.rb +98 -0
  202. data/spec/lib/krikri/harvesters/api_harvester_spec.rb~ +47 -0
  203. data/spec/lib/krikri/harvesters/mdl_api_harvester_spec.rb~ +11 -0
  204. data/spec/lib/krikri/harvesters/mdl_harvester_spec.rb~ +8 -0
  205. data/spec/lib/krikri/harvesters/oai_harvester_spec.rb +12 -6
  206. data/spec/lib/krikri/job_spec.rb +40 -0
  207. data/spec/lib/krikri/mapper_agent_spec.rb +10 -6
  208. data/spec/lib/krikri/util/extended_date_parser_spec.rb +14 -2
  209. data/spec/models/search_index_document_spec.rb +8 -0
  210. data/spec/spec_helper.rb +5 -0
  211. data/spec/support/shared_examples/harvester.rb +8 -1
  212. data/spec/support/shared_examples/rdf_source.rb +22 -0
  213. data/spec/support/shared_examples/string_enrichment.rb +13 -4
  214. metadata +365 -21
  215. data/lib/krikri/enrichments/capitalize.rb~ +0 -24
  216. data/lib/krikri/enrichments/genre_filter.rb~ +0 -9
  217. data/lib/krikri/enrichments/iso_enrcich.rb~ +0 -8
  218. data/lib/krikri/enrichments/iso_enrich.rb~ +0 -13
  219. data/lib/krikri/enrichments/parse_date.rb~ +0 -10
  220. data/lib/krikri/enrichments/strip_html.rb~ +0 -16
  221. data/lib/krikri/enrichments/timespan.rb~ +0 -6
  222. data/lib/krikri/enrichments/timespan_split.rb~ +0 -83
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6e3513b7b316e31b65b2779273dec1985fadf37
4
- data.tar.gz: d094a7b90fcdcf4e96ce47ab36a3f0db8474a293
3
+ metadata.gz: 8ba2b247b82e5f0d8a3f52adb0e3242d41deff3b
4
+ data.tar.gz: 429288310bd3531d4fbb20881d7a17983c69a015
5
5
  SHA512:
6
- metadata.gz: 215e8a8e7b6e2118b444c8699b03c37ba67aed42a8b04cd94569dfb11be91d2cff230deb668885a120f3203f38dd865c77878e27d60782ea7bddc36d8f57a6d1
7
- data.tar.gz: cb1e47c7e94ff570492d12fb4ba21c4d383ffff45bbf167d16aa1f252559fd166c3cc0ab457737bf314685bd2dcdc10cd18bec562cf6f9c0f5029c860abaa2ea
6
+ metadata.gz: 783c38573ddf5cac189f56220981aeba2768c1eba0a5e4d873d36642f4e633ca9e3ef449ba14ceae49d58b0bc05e426c9b5b84708f0ed7e9788ec305f3416464
7
+ data.tar.gz: 817df4eee63756335abc562d8badc40160d32d83a92aa5fde93e32ba17ca2938b2b311eed93cf8469756939f6551afd3b0bca25ca2b269f504d6b21b258457b8
@@ -80,5 +80,19 @@ module Krikri
80
80
 
81
81
  config.solr_document_model = Krikri::SearchIndexDocument
82
82
  end
83
+
84
+ ##
85
+ # Construct a valid item URI from a local name, and use it to fetch a single
86
+ # document from the search index.
87
+ # Override method in Blacklight::SolrHelper.
88
+ # TODO: This method is depreciated in Blacklight v5.10.
89
+ # TODO: Write appropriate test for this functionality after it is updated
90
+ # with Blacklight v5.10.
91
+ # @param String id is a local name.
92
+ def get_solr_response_for_doc_id(id=nil, extra_controller_params={})
93
+ id_uri = Krikri::Settings.marmotta.item_container << '/' << id
94
+ solr_response = solr_repository.find(id_uri, extra_controller_params)
95
+ [solr_response, solr_response.documents.first]
96
+ end
83
97
  end
84
98
  end
@@ -4,6 +4,16 @@ module Krikri
4
4
  # Represents a single document returned from a query to the search index.
5
5
  class SearchIndexDocument < SolrDocument
6
6
 
7
+ ##
8
+ # Use local name instead of full item id URI in route. For example, a
9
+ # document with the id 'http://dp.la/marmotta/ldp/items/123ab' will have an
10
+ # id param of '123ab'. This is necessary because routes that contain '.'
11
+ # are not valid.
12
+ # @return String
13
+ def to_param
14
+ self[self.class.unique_key].match(/[\/]([^\/]*)\z/)[1]
15
+ end
16
+
7
17
  ##
8
18
  # Get the aggregation, populated with data from Marmotta, which corresponds
9
19
  # to this SearchIndexDocument
data/lib/krikri/engine.rb CHANGED
@@ -5,6 +5,7 @@ require 'krikri/ldp'
5
5
  require 'dpla/map'
6
6
  require 'rdf/marmotta'
7
7
  require 'oai/client'
8
+ require 'rest-client'
8
9
  require 'edtf'
9
10
 
10
11
  require 'resque'
@@ -0,0 +1,156 @@
1
+ module Krikri
2
+ ##
3
+ # A SoftwareAgent that runs enrichment processes.
4
+ #
5
+ # @example
6
+ #
7
+ # To enrich records that were mapped by the mapping activity with ID 3:
8
+ #
9
+ # # Define which enrichments are run, and thier parameters:
10
+ # chain = {
11
+ # 'Krikri::Enrichments::StripHtml' => {
12
+ # input_fields: [{sourceResource: :title}]
13
+ # },
14
+ # 'Krikri::Enrichments::StripWhitespace' => {
15
+ # input_fields: [{sourceResource: :title}]
16
+ # }
17
+ # }
18
+ # Krikri::Enricher.enqueue({
19
+ # generator_uri: 'http://ldp.local.dp.la/ldp/activity/3',
20
+ # chain: chain
21
+ # })
22
+ #
23
+ # @see Krikri::SoftwareAgent#enqueue
24
+ # @see Krikri::Enrichment
25
+ #
26
+ class Enricher
27
+ include SoftwareAgent
28
+
29
+ attr_reader :chain, :generator_uri
30
+
31
+ def self.queue_name
32
+ :enrichment
33
+ end
34
+
35
+ ##
36
+ # Create a new Enricher, given a hash of options:
37
+ # generator_uri: the LDP URI of the Activity that generated the mapped
38
+ # records that this one will enrich.
39
+ # chain: a hash specifying the input_fields and output_fields, as
40
+ # illustrated above, which will be passed to the Enrichment.
41
+ #
42
+ # @see Krikri::Enrichment
43
+ # @param opts [Hash] a hash of options
44
+ def initialize(opts = {})
45
+ @generator_uri = RDF::URI(opts.fetch(:generator_uri))
46
+ @chain = deep_sym(opts.fetch(:chain) { {} })
47
+ end
48
+
49
+ ##
50
+ # Run the enrichmnt.
51
+ #
52
+ # Take each record that was affected by the activity defined by our
53
+ # instantiation, and apply each enrichment from the enrichment chain.
54
+ #
55
+ def run(activity_uri = nil)
56
+ log :info, 'enricher is running'
57
+ # see TODO below
58
+ target_aggregations.each do |agg|
59
+ begin
60
+ chain_enrichments!(agg)
61
+ activity_uri ? agg.save_with_provenance(activity_uri) : agg.save
62
+ rescue => e
63
+ log :error, "Enrichment error: #{e.message}\n#{e.backtrace}"
64
+ end
65
+ end
66
+ log :info, 'enricher is done'
67
+ end
68
+
69
+ # TODO: remove this when the current topic branch that introduces the
70
+ # EntityConsumer mixin has been merged.
71
+ def target_aggregations
72
+ query = Krikri::ProvenanceQueryClient.find_by_activity(generator_uri)
73
+ query.execute.lazy.flat_map do |solution|
74
+ agg = DPLA::MAP::Aggregation.new(solution.record.to_s)
75
+ agg.get
76
+ agg
77
+ end
78
+ end
79
+
80
+ ##
81
+ # Given an aggregation, take each enrichment specified by the `chain'
82
+ # given in our instantiation, and apply that enrichment, with the given
83
+ # options, modifying the aggregation in-place.
84
+ #
85
+ def chain_enrichments!(agg)
86
+ chain.keys.each do |e|
87
+ enrichment = e.to_s.constantize.new
88
+ if enrichment.is_a? Krikri::FieldEnrichment
89
+ agg = do_field_enrichment(agg, enrichment, chain[e])
90
+ else
91
+ agg = do_basic_enrichment(agg, enrichment, chain[e])
92
+ end
93
+ end
94
+ end
95
+
96
+ private
97
+
98
+ ##
99
+ # Perform a default enrichment, using Enrichment#enrichment or a derived
100
+ # class that expects the same arguments.
101
+ #
102
+ # @param agg [DPLA::MAP::Aggregation]
103
+ # @param enrichment [Krikri::Enrichment]
104
+ # @param options [Hash]
105
+ #
106
+ # @see Krikri::Enrichment
107
+ #
108
+ def do_basic_enrichment(agg, enrichment, options)
109
+ enrichment.enrich(
110
+ agg, options[:input_fields], options[:output_fields]
111
+ )
112
+ end
113
+
114
+ ##
115
+ # Perform a FieldEnrichment enrichment on the given aggregation.
116
+ #
117
+ # With FieldEnrichment#enrich, the input_fields option parameter is passed
118
+ # as a variable arguments list
119
+ #
120
+ # @param agg [DPLA::MAP::Aggregation]
121
+ # @param enrichment [Krikri::FieldEnrichment]
122
+ # @param options [Hash] Hash with :input_fields containing variable
123
+ # arguments list
124
+ #
125
+ # @see Krikri::FieldEnrichment
126
+ #
127
+ def do_field_enrichment(agg, enrichment, options)
128
+ enrichment.enrich(agg, *options[:input_fields])
129
+ end
130
+
131
+ ##
132
+ # Transform the given hash recursively by turning all of its string keys
133
+ # and values into symbols.
134
+ #
135
+ # Symbols are expected in the enrichment classes, and we will usually be
136
+ # dealing with values that have been deserialized from JSON.
137
+ #
138
+ def deep_sym(obj)
139
+ if obj.is_a? Hash
140
+ return obj.inject({}) do |memo, (k, v)|
141
+ memo[k.to_sym] = deep_sym(v)
142
+ memo
143
+ end
144
+ elsif obj.is_a? Array
145
+ return obj.inject([]) do |memo, el|
146
+ memo << deep_sym(el)
147
+ memo
148
+ end
149
+ elsif obj.respond_to? :to_sym
150
+ return obj.to_sym
151
+ else
152
+ return nil
153
+ end
154
+ end
155
+ end
156
+ end
@@ -3,8 +3,6 @@ module Krikri
3
3
  # Mixin module for enriching a set of input_fields and setting the resulting
4
4
  # values to a set of output fields.
5
5
  module Enrichment
6
- extend SoftwareAgent
7
-
8
6
  ##
9
7
  # The main enrichment method; passes specified input fields to
10
8
  # #enrich_values, which must return an array of values with length equal to
@@ -1,10 +1,7 @@
1
1
  module Krikri::Enrichments
2
2
  ##
3
- # Enrichment to remove duplicate values
4
3
  #
5
- #
6
- class Deduplication
4
+ class DedupValues
7
5
  include Krikri::FieldEnrichment
8
-
9
6
  end
10
7
  end
@@ -0,0 +1,45 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Enrichment to remove non-genre fields from
4
+ #
5
+ # StripHtml.new.enrich_value('Book') => 'Book'
6
+ # StripHtml.new.enrich_value('not a book') => nil
7
+ #
8
+ # Allowed genre terms are:
9
+ #
10
+ # - Book
11
+ # - Film/Video
12
+ # - Manuscript
13
+ # - Maps
14
+ # - Music
15
+ # - Musical Score
16
+ # - Newspapers
17
+ # - Nonmusic
18
+ # - Photograph/Pictorial Works
19
+ # - Serial
20
+ #
21
+ # Removes all non-string values
22
+ class GenreFilter
23
+ include Krikri::FieldEnrichment
24
+
25
+ TERMS = ['Book',
26
+ 'Film/Video',
27
+ 'Manuscript',
28
+ 'Maps',
29
+ 'Music',
30
+ 'Musical Score',
31
+ 'Newspapers',
32
+ 'Nonmusic',
33
+ 'Photograph/Pictorial Works',
34
+ 'Serial']
35
+
36
+ def enrich_value(value)
37
+ return nil unless value.is_a? String
38
+ term = TERMS.select do |t|
39
+ t.downcase.gsub(/[^a-zA-Z]/, '') ==
40
+ value.downcase.gsub(/[^a-zA-Z]/, '')
41
+ end
42
+ term.empty? ? nil : term.first
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,21 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip ending punctuation
4
+ #
5
+ # StripEndingPunctuation.new
6
+ # .enrich_value("moomin!...!;,.",)
7
+ # # => "moomin"
8
+ #
9
+ # Leaves quotation marks and closing parentheses & brackets. Also
10
+ # leaves periods when they follow a one or two letter abbreviation.
11
+ class StripEndingPunctuation
12
+ include Krikri::FieldEnrichment
13
+
14
+ def enrich_value(value)
15
+ return value unless value.is_a? String
16
+ value.gsub!(/[^\p{Alnum}\'\"\)\]\}]*$/, '') unless
17
+ value.match /\s*[a-z]{1,2}\.$/i
18
+ value
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip Ending punctuation
4
+ #
5
+ # StripPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,15 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading colons
4
+ #
5
+ # StripLeadingColons.new.enrich_value(";:\tmoominpa()pa;;;")
6
+ # # => "\tmoominpa()pa;;;"
7
+ class StripLeadingColons
8
+ include Krikri::FieldEnrichment
9
+
10
+ def enrich_value(value)
11
+ return value unless value.is_a? String
12
+ value.gsub(/^[\;\:]*/, '')
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading colons
4
+ #
5
+ # StripPunctuation.new.enrich_value(";:\tmoominpa()pa;;;")
6
+ # # => "\tmoominpa()pa;;;"
7
+ class StripLeadingColons
8
+ include Krikri::FieldEnrichment
9
+
10
+ def enrich_value(value)
11
+ return value unless value.is_a? String
12
+ value.gsub(/^[\;\:]*/, '')
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading punctuation
4
+ #
5
+ # StripLeadingPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading punctuation
4
+ #
5
+ # StripPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -28,18 +28,40 @@ module Krikri::Enrichments
28
28
  class TimespanSplit
29
29
  include Krikri::FieldEnrichment
30
30
 
31
+ ##
32
+ # Enrich a `DPLA::MAP::TimeSpan` object or string value with `begin` and
33
+ # `end` values.
34
+ #
35
+ # @param value [DPLA::MAP::TimeSpan, String, Object]
36
+ #
37
+ # @return [Object] a new `TimeSpan` object containing the providedLabel
38
+ # and the enriched begin/end; if given a value other than a `TimeSpan`
39
+ # or `String` returns that value.
31
40
  def enrich_value(value)
32
41
  value = timespan_from_string(value) if value.is_a? String
33
42
  return value unless value.is_a? DPLA::MAP::TimeSpan
34
43
  populate_timespan(value)
35
44
  end
36
45
 
46
+ ##
47
+ # Converts a string to a `DPLA::MAP::TimeSpan` with the string as
48
+ # `providedLabel`.
49
+ #
50
+ # @param [String] a string value containing a date, time, or timespan
51
+ #
52
+ # @return [DPLA::MAP::TimeSpan] a new, empty timespan with `providedLabel`
37
53
  def timespan_from_string(value)
38
54
  timespan = DPLA::MAP::TimeSpan.new
39
55
  timespan.providedLabel = value
40
56
  timespan
41
57
  end
42
58
 
59
+ ##
60
+ # Populates a timespan with a begin and end date.
61
+ #
62
+ # @param timespan [DPLA::MAP::TimeSpan]
63
+ #
64
+ # @return [DPLA::MAP::TimeSpan]
43
65
  def populate_timespan(timespan)
44
66
  return timespan unless (timespan.begin.empty? || timespan.end.empty?) &&
45
67
  !timespan.providedLabel.empty?
@@ -55,17 +77,37 @@ module Krikri::Enrichments
55
77
  return timespan
56
78
  end
57
79
 
80
+ ##
81
+ # @return [Array<Date, EDTF::Interval>]
58
82
  def parse_labels(labels)
59
83
  labels.map { |l| Krikri::Util::ExtendedDateParser.parse(l, true) }.compact
60
84
  end
61
85
 
86
+ ##
87
+ # Converts an EDTF date to a begin and end date.
88
+ #
89
+ # @param date [Date, DateTime, EDTF::Interval] a date, with or without EDTF
90
+ # precision features; or an interval.
91
+ #
92
+ # @return [Array<Date, DateTime>] an array of two elements containing the
93
+ # begin and end dates.
62
94
  def span_from_date(date)
63
95
  return [nil, nil] if date.nil?
64
- return [date, date] if date.is_a? Date
96
+ if date.is_a?(Date)
97
+ return [date, date] if date.precision == :day
98
+ return [date, (date.succ - 1)]
99
+ end
65
100
  [(date.respond_to?(:first) ? date.first : date.from),
66
101
  (date.respond_to?(:last) ? date.last : date.to)]
67
102
  end
68
103
 
104
+ ##
105
+ # Reduces a timespan with multiple begin or end dates to a single earliest
106
+ # begin date and a single latest end date.
107
+ #
108
+ # @param timespan [DPLA::MAP::TimeSpan] the timespan to reduce
109
+ #
110
+ # @return [DPLA::MAP::TimeSpan] an updated timespan
69
111
  def reduce_to_largest_span(timespan)
70
112
  timespan.begin = timespan.begin.sort.first
71
113
  timespan.end = timespan.end.sort.last