krikri 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/krikri/records_controller.rb +14 -0
  3. data/app/models/krikri/search_index_document.rb +10 -0
  4. data/lib/krikri/engine.rb +1 -0
  5. data/lib/krikri/enricher.rb +156 -0
  6. data/lib/krikri/enrichment.rb +0 -2
  7. data/lib/krikri/enrichments/{deduplication.rb~ → dedup_values.rb~} +1 -4
  8. data/lib/krikri/enrichments/genre_filter.rb +45 -0
  9. data/lib/krikri/enrichments/strip_ending_punctuation.rb +21 -0
  10. data/lib/krikri/enrichments/strip_ending_punctuation.rb~ +18 -0
  11. data/lib/krikri/enrichments/strip_leading_colons.rb +15 -0
  12. data/lib/krikri/enrichments/strip_leading_colons.rb~ +15 -0
  13. data/lib/krikri/enrichments/strip_leading_punctuation.rb +18 -0
  14. data/lib/krikri/enrichments/strip_leading_punctuation.rb~ +18 -0
  15. data/lib/krikri/enrichments/timespan_split.rb +43 -1
  16. data/lib/krikri/harvesters/api_harvester.rb +164 -0
  17. data/lib/krikri/harvesters/api_harvester.rb~ +44 -0
  18. data/lib/krikri/harvesters/mdl_api_harvester.rb~ +7 -0
  19. data/lib/krikri/harvesters/mdl_harvester.rb~ +4 -0
  20. data/lib/krikri/harvesters/oai_harvester.rb +8 -0
  21. data/lib/krikri/ldp/rdf_source.rb +30 -0
  22. data/lib/krikri/mapper.rb +1 -3
  23. data/lib/krikri/provenance_query_client.rb +11 -1
  24. data/lib/krikri/util/extended_date_parser.rb +46 -11
  25. data/lib/krikri/version.rb +1 -1
  26. data/spec/internal/Gemfile.lock +12 -12
  27. data/spec/internal/config/initializers/blacklight_initializer.rb +1 -1
  28. data/spec/internal/config/initializers/devise.rb +2 -2
  29. data/spec/internal/config/secrets.yml +2 -2
  30. data/spec/internal/db/development.sqlite3 +0 -0
  31. data/spec/internal/db/migrate/{20150310190235_devise_create_users.rb → 20150320153132_devise_create_users.rb} +0 -0
  32. data/spec/internal/db/migrate/{20150310190253_create_searches.blacklight.rb → 20150320153151_create_searches.blacklight.rb} +0 -0
  33. data/spec/internal/db/migrate/{20150310190254_create_bookmarks.blacklight.rb → 20150320153152_create_bookmarks.blacklight.rb} +0 -0
  34. data/spec/internal/db/migrate/{20150310190255_add_polymorphic_type_to_bookmarks.blacklight.rb → 20150320153153_add_polymorphic_type_to_bookmarks.blacklight.rb} +0 -0
  35. data/spec/internal/db/schema.rb +1 -1
  36. data/spec/internal/db/test.sqlite3 +0 -0
  37. data/spec/internal/log/development.log +75 -73
  38. data/spec/internal/log/test.log +85076 -0
  39. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_alerts.scssc +0 -0
  40. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_background-variant.scssc +0 -0
  41. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_border-radius.scssc +0 -0
  42. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_buttons.scssc +0 -0
  43. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_center-block.scssc +0 -0
  44. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_clearfix.scssc +0 -0
  45. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_forms.scssc +0 -0
  46. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_gradients.scssc +0 -0
  47. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid-framework.scssc +0 -0
  48. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid.scssc +0 -0
  49. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_hide-text.scssc +0 -0
  50. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_image.scssc +0 -0
  51. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_labels.scssc +0 -0
  52. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_list-group.scssc +0 -0
  53. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-divider.scssc +0 -0
  54. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-vertical-align.scssc +0 -0
  55. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_opacity.scssc +0 -0
  56. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_pagination.scssc +0 -0
  57. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_panels.scssc +0 -0
  58. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_progress-bar.scssc +0 -0
  59. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_reset-filter.scssc +0 -0
  60. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_resize.scssc +0 -0
  61. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_responsive-visibility.scssc +0 -0
  62. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_size.scssc +0 -0
  63. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_tab-focus.scssc +0 -0
  64. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_table-row.scssc +0 -0
  65. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-emphasis.scssc +0 -0
  66. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-overflow.scssc +0 -0
  67. data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_vendor-prefixes.scssc +0 -0
  68. data/spec/internal/tmp/cache/assets/test/sass/93e201cf4a11978a1f491a057a3bd569c3825210/blacklight.css.scssc +0 -0
  69. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_alerts.scssc +0 -0
  70. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_badges.scssc +0 -0
  71. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_breadcrumbs.scssc +0 -0
  72. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_button-groups.scssc +0 -0
  73. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_buttons.scssc +0 -0
  74. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_carousel.scssc +0 -0
  75. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_close.scssc +0 -0
  76. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_code.scssc +0 -0
  77. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_component-animations.scssc +0 -0
  78. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_dropdowns.scssc +0 -0
  79. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_forms.scssc +0 -0
  80. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_glyphicons.scssc +0 -0
  81. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_grid.scssc +0 -0
  82. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_input-groups.scssc +0 -0
  83. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_jumbotron.scssc +0 -0
  84. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_labels.scssc +0 -0
  85. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_list-group.scssc +0 -0
  86. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_media.scssc +0 -0
  87. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_mixins.scssc +0 -0
  88. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_modals.scssc +0 -0
  89. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navbar.scssc +0 -0
  90. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navs.scssc +0 -0
  91. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_normalize.scssc +0 -0
  92. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pager.scssc +0 -0
  93. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pagination.scssc +0 -0
  94. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_panels.scssc +0 -0
  95. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_popovers.scssc +0 -0
  96. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_print.scssc +0 -0
  97. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_progress-bars.scssc +0 -0
  98. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-embed.scssc +0 -0
  99. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-utilities.scssc +0 -0
  100. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_scaffolding.scssc +0 -0
  101. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tables.scssc +0 -0
  102. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_thumbnails.scssc +0 -0
  103. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tooltip.scssc +0 -0
  104. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_type.scssc +0 -0
  105. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_utilities.scssc +0 -0
  106. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_variables.scssc +0 -0
  107. data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_wells.scssc +0 -0
  108. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap-sprockets.scssc +0 -0
  109. data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap.scssc +0 -0
  110. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_blacklight_base.scssc +0 -0
  111. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_bookmark.scssc +0 -0
  112. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_catalog.scssc +0 -0
  113. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_facets.scssc +0 -0
  114. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_group.scssc +0 -0
  115. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_header.scssc +0 -0
  116. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_layout.scssc +0 -0
  117. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_modal.scssc +0 -0
  118. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_search_history.scssc +0 -0
  119. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight.scssc +0 -0
  120. data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight_defaults.scssc +0 -0
  121. data/spec/internal/tmp/cache/assets/test/sprockets/059eceaa18e2eaee3d5168f3949f4957 +0 -0
  122. data/spec/internal/tmp/cache/assets/test/sprockets/093d898bf3f20b26b13c82714a77c040 +0 -0
  123. data/spec/internal/tmp/cache/assets/test/sprockets/10517c9579f8d23c82fb8caa66dca6a7 +0 -0
  124. data/spec/internal/tmp/cache/assets/test/sprockets/13fe41fee1fe35b49d145bcc06610705 +0 -0
  125. data/spec/internal/tmp/cache/assets/test/sprockets/19f4ba6fa86d2609c171da9f5ee3b11a +0 -0
  126. data/spec/internal/tmp/cache/assets/test/sprockets/2bec18569db80effd80a2a19a038100b +0 -0
  127. data/spec/internal/tmp/cache/assets/test/sprockets/2f5173deea6c795b8fdde723bb4b63af +0 -0
  128. data/spec/internal/tmp/cache/assets/test/sprockets/30ca7caa200fa8eaddaef443913ad9ad +0 -0
  129. data/spec/internal/tmp/cache/assets/test/sprockets/350a65d446eb7398f96d102f7a1a6d69 +0 -0
  130. data/spec/internal/tmp/cache/assets/test/sprockets/357970feca3ac29060c1e3861e2c0953 +0 -0
  131. data/spec/internal/tmp/cache/assets/test/sprockets/3cc20a63495e66e9251101ccfb0d389c +0 -0
  132. data/spec/internal/tmp/cache/assets/test/sprockets/4052820c15af72ba690230a0f92bd75e +0 -0
  133. data/spec/internal/tmp/cache/assets/test/sprockets/41b62fb10530766b5b9fef3338f5f30b +0 -0
  134. data/spec/internal/tmp/cache/assets/test/sprockets/421c89c58d3e76046a0294dae1f731ff +0 -0
  135. data/spec/internal/tmp/cache/assets/test/sprockets/43da966542fafc2a4c768be78527391c +0 -0
  136. data/spec/internal/tmp/cache/assets/test/sprockets/496a0d7dce1ff6bf4a9c3a089ea3a635 +0 -0
  137. data/spec/internal/tmp/cache/assets/test/sprockets/4e93570db72bb61820711448b78d548d +0 -0
  138. data/spec/internal/tmp/cache/assets/test/sprockets/50b9db0b908b421a9b941a445dbaeacc +0 -0
  139. data/spec/internal/tmp/cache/assets/test/sprockets/5155772cce704d83a64aa83eeda20c5a +0 -0
  140. data/spec/internal/tmp/cache/assets/test/sprockets/528c628cf107f8be6dd122e1154344be +0 -0
  141. data/spec/internal/tmp/cache/assets/test/sprockets/5ed02c7072bb024fa0832a1ca4113227 +0 -0
  142. data/spec/internal/tmp/cache/assets/test/sprockets/60141dc3d16c502963c3d798a099862d +0 -0
  143. data/spec/internal/tmp/cache/assets/test/sprockets/6df5b5c0a8e6d87ced9504a460df875e +0 -0
  144. data/spec/internal/tmp/cache/assets/test/sprockets/6eefd2765a716d66ff857cfcda69a119 +0 -0
  145. data/spec/internal/tmp/cache/assets/test/sprockets/813e23719129cf9f19349f720bebdf70 +0 -0
  146. data/spec/internal/tmp/cache/assets/test/sprockets/88ded10f28177bec1bddb0d72d6b1cd2 +0 -0
  147. data/spec/internal/tmp/cache/assets/test/sprockets/8ad5c3dc0f5fcc5d7a775d6294dda9f5 +0 -0
  148. data/spec/internal/tmp/cache/assets/test/sprockets/8edfca9082e02111be92e79000667f22 +0 -0
  149. data/spec/internal/tmp/cache/assets/test/sprockets/90b54a819800edfa41b67722d1561040 +0 -0
  150. data/spec/internal/tmp/cache/assets/test/sprockets/93aaa9231a32901266b05632f3d35ecd +0 -0
  151. data/spec/internal/tmp/cache/assets/test/sprockets/9c653367feff82588eb6041d783a5809 +0 -0
  152. data/spec/internal/tmp/cache/assets/test/sprockets/9ce190f082dc4fb74bddb818cc011ce6 +0 -0
  153. data/spec/internal/tmp/cache/assets/test/sprockets/9f68eb44ffcb9dd95623c179300414fd +0 -0
  154. data/spec/internal/tmp/cache/assets/test/sprockets/ab2a05b91d7d316d4ceb47ce067006fb +0 -0
  155. data/spec/internal/tmp/cache/assets/test/sprockets/ad4a54c43c2a4c3874bfde1c9f08c248 +0 -0
  156. data/spec/internal/tmp/cache/assets/test/sprockets/b1a42de74c934edd0e5eed0f98a23597 +0 -0
  157. data/spec/internal/tmp/cache/assets/test/sprockets/b1c03c36188b4f58f819510adb9a7e9c +0 -0
  158. data/spec/internal/tmp/cache/assets/test/sprockets/b225d67626669cb154cbe08a3c439d48 +0 -0
  159. data/spec/internal/tmp/cache/assets/test/sprockets/b35e12934e9f05662777579549e31cd7 +0 -0
  160. data/spec/internal/tmp/cache/assets/test/sprockets/b3ae9e3cdd9991d7103b0a00e33b5778 +0 -0
  161. data/spec/internal/tmp/cache/assets/test/sprockets/b42c247628daaf44109584724682a6ad +0 -0
  162. data/spec/internal/tmp/cache/assets/test/sprockets/b660ec1ede271559cfd0259eed96ae9c +0 -0
  163. data/spec/internal/tmp/cache/assets/test/sprockets/ba078b29a7f067b3acdd538a286235a8 +0 -0
  164. data/spec/internal/tmp/cache/assets/test/sprockets/bb108ef3fc4c96d1c20cc41f97d943a0 +0 -0
  165. data/spec/internal/tmp/cache/assets/test/sprockets/bb92f4b8c1bedafe60b6f226b549138a +0 -0
  166. data/spec/internal/tmp/cache/assets/test/sprockets/c644ecad928e076f999e9c9a3bc350c9 +0 -0
  167. data/spec/internal/tmp/cache/assets/test/sprockets/c78dd20a0df27e9394a413bf2e4abf92 +0 -0
  168. data/spec/internal/tmp/cache/assets/test/sprockets/c9efce42580b0ad0374a052aa61f2a07 +0 -0
  169. data/spec/internal/tmp/cache/assets/test/sprockets/cffd775d018f68ce5dba1ee0d951a994 +0 -0
  170. data/spec/internal/tmp/cache/assets/test/sprockets/d1bce36d6e3ab792562b9c929f2ce897 +0 -0
  171. data/spec/internal/tmp/cache/assets/test/sprockets/d24a7c4b8ac71a90e809881a64970898 +0 -0
  172. data/spec/internal/tmp/cache/assets/test/sprockets/d771ace226fc8215a3572e0aa35bb0d6 +0 -0
  173. data/spec/internal/tmp/cache/assets/test/sprockets/db0d9534ecdebba33af5d1d81060e855 +0 -0
  174. data/spec/internal/tmp/cache/assets/test/sprockets/dbba4bbc32c17ade3d618c5d0baeb371 +0 -0
  175. data/spec/internal/tmp/cache/assets/test/sprockets/e8276b98892dcac8285a592bb63fe75b +0 -0
  176. data/spec/internal/tmp/cache/assets/test/sprockets/e9f7ccc553ce1a217709cc7a08cfb032 +0 -0
  177. data/spec/internal/tmp/cache/assets/test/sprockets/ecfc314951b349e27742c1b26880619e +0 -0
  178. data/spec/internal/tmp/cache/assets/test/sprockets/eddae0741d973a8ab3df27c1ea5b635e +0 -0
  179. data/spec/internal/tmp/cache/assets/test/sprockets/f274b5f22db177b6464b50691d531688 +0 -0
  180. data/spec/internal/tmp/cache/assets/test/sprockets/f3935581a84aba9a499005ed6a8e18be +0 -0
  181. data/spec/internal/tmp/cache/assets/test/sprockets/f3e7729c53b8a2c408086a61b58922ae +0 -0
  182. data/spec/internal/tmp/cache/assets/test/sprockets/f45dfdb34e84d3c72a3319399a7316a3 +0 -0
  183. data/spec/internal/tmp/cache/assets/test/sprockets/f74af03e26e893af40b07a6d0f970cd4 +0 -0
  184. data/spec/internal/tmp/cache/assets/test/sprockets/f7cbd26ba1d28d48de824f0e94586655 +0 -0
  185. data/spec/internal/tmp/cache/assets/test/sprockets/fa11f1beb15b0f04503ce1f06701a8bf +0 -0
  186. data/spec/internal/tmp/cache/assets/test/sprockets/faac2909046aa87e1f1f08b2bf0ad2b2 +0 -0
  187. data/spec/lib/krikri/enricher_spec.rb +130 -0
  188. data/spec/lib/krikri/enrichments/dedup_values_spec.rb~ +10 -0
  189. data/spec/lib/krikri/enrichments/genre_filter_spec.rb +21 -0
  190. data/spec/lib/krikri/enrichments/remove_empty_fields_spec.rb +1 -0
  191. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb +28 -0
  192. data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb~ +16 -0
  193. data/spec/lib/krikri/enrichments/strip_html_spec.rb +1 -0
  194. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb +16 -0
  195. data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb~ +16 -0
  196. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb +16 -0
  197. data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb~ +16 -0
  198. data/spec/lib/krikri/enrichments/strip_punctuation_spec.rb +1 -0
  199. data/spec/lib/krikri/enrichments/strip_whitespace_spec.rb +1 -0
  200. data/spec/lib/krikri/enrichments/timespan_split_spec.rb +15 -0
  201. data/spec/lib/krikri/harvesters/api_harvester_spec.rb +98 -0
  202. data/spec/lib/krikri/harvesters/api_harvester_spec.rb~ +47 -0
  203. data/spec/lib/krikri/harvesters/mdl_api_harvester_spec.rb~ +11 -0
  204. data/spec/lib/krikri/harvesters/mdl_harvester_spec.rb~ +8 -0
  205. data/spec/lib/krikri/harvesters/oai_harvester_spec.rb +12 -6
  206. data/spec/lib/krikri/job_spec.rb +40 -0
  207. data/spec/lib/krikri/mapper_agent_spec.rb +10 -6
  208. data/spec/lib/krikri/util/extended_date_parser_spec.rb +14 -2
  209. data/spec/models/search_index_document_spec.rb +8 -0
  210. data/spec/spec_helper.rb +5 -0
  211. data/spec/support/shared_examples/harvester.rb +8 -1
  212. data/spec/support/shared_examples/rdf_source.rb +22 -0
  213. data/spec/support/shared_examples/string_enrichment.rb +13 -4
  214. metadata +365 -21
  215. data/lib/krikri/enrichments/capitalize.rb~ +0 -24
  216. data/lib/krikri/enrichments/genre_filter.rb~ +0 -9
  217. data/lib/krikri/enrichments/iso_enrcich.rb~ +0 -8
  218. data/lib/krikri/enrichments/iso_enrich.rb~ +0 -13
  219. data/lib/krikri/enrichments/parse_date.rb~ +0 -10
  220. data/lib/krikri/enrichments/strip_html.rb~ +0 -16
  221. data/lib/krikri/enrichments/timespan.rb~ +0 -6
  222. data/lib/krikri/enrichments/timespan_split.rb~ +0 -83
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6e3513b7b316e31b65b2779273dec1985fadf37
4
- data.tar.gz: d094a7b90fcdcf4e96ce47ab36a3f0db8474a293
3
+ metadata.gz: 8ba2b247b82e5f0d8a3f52adb0e3242d41deff3b
4
+ data.tar.gz: 429288310bd3531d4fbb20881d7a17983c69a015
5
5
  SHA512:
6
- metadata.gz: 215e8a8e7b6e2118b444c8699b03c37ba67aed42a8b04cd94569dfb11be91d2cff230deb668885a120f3203f38dd865c77878e27d60782ea7bddc36d8f57a6d1
7
- data.tar.gz: cb1e47c7e94ff570492d12fb4ba21c4d383ffff45bbf167d16aa1f252559fd166c3cc0ab457737bf314685bd2dcdc10cd18bec562cf6f9c0f5029c860abaa2ea
6
+ metadata.gz: 783c38573ddf5cac189f56220981aeba2768c1eba0a5e4d873d36642f4e633ca9e3ef449ba14ceae49d58b0bc05e426c9b5b84708f0ed7e9788ec305f3416464
7
+ data.tar.gz: 817df4eee63756335abc562d8badc40160d32d83a92aa5fde93e32ba17ca2938b2b311eed93cf8469756939f6551afd3b0bca25ca2b269f504d6b21b258457b8
@@ -80,5 +80,19 @@ module Krikri
80
80
 
81
81
  config.solr_document_model = Krikri::SearchIndexDocument
82
82
  end
83
+
84
+ ##
85
+ # Construct a valid item URI from a local name, and use it to fetch a single
86
+ # document from the search index.
87
+ # Override method in Blacklight::SolrHelper.
88
+ # TODO: This method is depreciated in Blacklight v5.10.
89
+ # TODO: Write appropriate test for this functionality after it is updated
90
+ # with Blacklight v5.10.
91
+ # @param String id is a local name.
92
+ def get_solr_response_for_doc_id(id=nil, extra_controller_params={})
93
+ id_uri = Krikri::Settings.marmotta.item_container << '/' << id
94
+ solr_response = solr_repository.find(id_uri, extra_controller_params)
95
+ [solr_response, solr_response.documents.first]
96
+ end
83
97
  end
84
98
  end
@@ -4,6 +4,16 @@ module Krikri
4
4
  # Represents a single document returned from a query to the search index.
5
5
  class SearchIndexDocument < SolrDocument
6
6
 
7
+ ##
8
+ # Use local name instead of full item id URI in route. For example, a
9
+ # document with the id 'http://dp.la/marmotta/ldp/items/123ab' will have an
10
+ # id param of '123ab'. This is necessary because routes that contain '.'
11
+ # are not valid.
12
+ # @return String
13
+ def to_param
14
+ self[self.class.unique_key].match(/[\/]([^\/]*)\z/)[1]
15
+ end
16
+
7
17
  ##
8
18
  # Get the aggregation, populated with data from Marmotta, which corresponds
9
19
  # to this SearchIndexDocument
data/lib/krikri/engine.rb CHANGED
@@ -5,6 +5,7 @@ require 'krikri/ldp'
5
5
  require 'dpla/map'
6
6
  require 'rdf/marmotta'
7
7
  require 'oai/client'
8
+ require 'rest-client'
8
9
  require 'edtf'
9
10
 
10
11
  require 'resque'
@@ -0,0 +1,156 @@
1
+ module Krikri
2
+ ##
3
+ # A SoftwareAgent that runs enrichment processes.
4
+ #
5
+ # @example
6
+ #
7
+ # To enrich records that were mapped by the mapping activity with ID 3:
8
+ #
9
+ # # Define which enrichments are run, and thier parameters:
10
+ # chain = {
11
+ # 'Krikri::Enrichments::StripHtml' => {
12
+ # input_fields: [{sourceResource: :title}]
13
+ # },
14
+ # 'Krikri::Enrichments::StripWhitespace' => {
15
+ # input_fields: [{sourceResource: :title}]
16
+ # }
17
+ # }
18
+ # Krikri::Enricher.enqueue({
19
+ # generator_uri: 'http://ldp.local.dp.la/ldp/activity/3',
20
+ # chain: chain
21
+ # })
22
+ #
23
+ # @see Krikri::SoftwareAgent#enqueue
24
+ # @see Krikri::Enrichment
25
+ #
26
+ class Enricher
27
+ include SoftwareAgent
28
+
29
+ attr_reader :chain, :generator_uri
30
+
31
+ def self.queue_name
32
+ :enrichment
33
+ end
34
+
35
+ ##
36
+ # Create a new Enricher, given a hash of options:
37
+ # generator_uri: the LDP URI of the Activity that generated the mapped
38
+ # records that this one will enrich.
39
+ # chain: a hash specifying the input_fields and output_fields, as
40
+ # illustrated above, which will be passed to the Enrichment.
41
+ #
42
+ # @see Krikri::Enrichment
43
+ # @param opts [Hash] a hash of options
44
+ def initialize(opts = {})
45
+ @generator_uri = RDF::URI(opts.fetch(:generator_uri))
46
+ @chain = deep_sym(opts.fetch(:chain) { {} })
47
+ end
48
+
49
+ ##
50
+ # Run the enrichmnt.
51
+ #
52
+ # Take each record that was affected by the activity defined by our
53
+ # instantiation, and apply each enrichment from the enrichment chain.
54
+ #
55
+ def run(activity_uri = nil)
56
+ log :info, 'enricher is running'
57
+ # see TODO below
58
+ target_aggregations.each do |agg|
59
+ begin
60
+ chain_enrichments!(agg)
61
+ activity_uri ? agg.save_with_provenance(activity_uri) : agg.save
62
+ rescue => e
63
+ log :error, "Enrichment error: #{e.message}\n#{e.backtrace}"
64
+ end
65
+ end
66
+ log :info, 'enricher is done'
67
+ end
68
+
69
+ # TODO: remove this when the current topic branch that introduces the
70
+ # EntityConsumer mixin has been merged.
71
+ def target_aggregations
72
+ query = Krikri::ProvenanceQueryClient.find_by_activity(generator_uri)
73
+ query.execute.lazy.flat_map do |solution|
74
+ agg = DPLA::MAP::Aggregation.new(solution.record.to_s)
75
+ agg.get
76
+ agg
77
+ end
78
+ end
79
+
80
+ ##
81
+ # Given an aggregation, take each enrichment specified by the `chain'
82
+ # given in our instantiation, and apply that enrichment, with the given
83
+ # options, modifying the aggregation in-place.
84
+ #
85
+ def chain_enrichments!(agg)
86
+ chain.keys.each do |e|
87
+ enrichment = e.to_s.constantize.new
88
+ if enrichment.is_a? Krikri::FieldEnrichment
89
+ agg = do_field_enrichment(agg, enrichment, chain[e])
90
+ else
91
+ agg = do_basic_enrichment(agg, enrichment, chain[e])
92
+ end
93
+ end
94
+ end
95
+
96
+ private
97
+
98
+ ##
99
+ # Perform a default enrichment, using Enrichment#enrichment or a derived
100
+ # class that expects the same arguments.
101
+ #
102
+ # @param agg [DPLA::MAP::Aggregation]
103
+ # @param enrichment [Krikri::Enrichment]
104
+ # @param options [Hash]
105
+ #
106
+ # @see Krikri::Enrichment
107
+ #
108
+ def do_basic_enrichment(agg, enrichment, options)
109
+ enrichment.enrich(
110
+ agg, options[:input_fields], options[:output_fields]
111
+ )
112
+ end
113
+
114
+ ##
115
+ # Perform a FieldEnrichment enrichment on the given aggregation.
116
+ #
117
+ # With FieldEnrichment#enrich, the input_fields option parameter is passed
118
+ # as a variable arguments list
119
+ #
120
+ # @param agg [DPLA::MAP::Aggregation]
121
+ # @param enrichment [Krikri::FieldEnrichment]
122
+ # @param options [Hash] Hash with :input_fields containing variable
123
+ # arguments list
124
+ #
125
+ # @see Krikri::FieldEnrichment
126
+ #
127
+ def do_field_enrichment(agg, enrichment, options)
128
+ enrichment.enrich(agg, *options[:input_fields])
129
+ end
130
+
131
+ ##
132
+ # Transform the given hash recursively by turning all of its string keys
133
+ # and values into symbols.
134
+ #
135
+ # Symbols are expected in the enrichment classes, and we will usually be
136
+ # dealing with values that have been deserialized from JSON.
137
+ #
138
+ def deep_sym(obj)
139
+ if obj.is_a? Hash
140
+ return obj.inject({}) do |memo, (k, v)|
141
+ memo[k.to_sym] = deep_sym(v)
142
+ memo
143
+ end
144
+ elsif obj.is_a? Array
145
+ return obj.inject([]) do |memo, el|
146
+ memo << deep_sym(el)
147
+ memo
148
+ end
149
+ elsif obj.respond_to? :to_sym
150
+ return obj.to_sym
151
+ else
152
+ return nil
153
+ end
154
+ end
155
+ end
156
+ end
@@ -3,8 +3,6 @@ module Krikri
3
3
  # Mixin module for enriching a set of input_fields and setting the resulting
4
4
  # values to a set of output fields.
5
5
  module Enrichment
6
- extend SoftwareAgent
7
-
8
6
  ##
9
7
  # The main enrichment method; passes specified input fields to
10
8
  # #enrich_values, which must return an array of values with length equal to
@@ -1,10 +1,7 @@
1
1
  module Krikri::Enrichments
2
2
  ##
3
- # Enrichment to remove duplicate values
4
3
  #
5
- #
6
- class Deduplication
4
+ class DedupValues
7
5
  include Krikri::FieldEnrichment
8
-
9
6
  end
10
7
  end
@@ -0,0 +1,45 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Enrichment to remove non-genre fields from
4
+ #
5
+ # StripHtml.new.enrich_value('Book') => 'Book'
6
+ # StripHtml.new.enrich_value('not a book') => nil
7
+ #
8
+ # Allowed genre terms are:
9
+ #
10
+ # - Book
11
+ # - Film/Video
12
+ # - Manuscript
13
+ # - Maps
14
+ # - Music
15
+ # - Musical Score
16
+ # - Newspapers
17
+ # - Nonmusic
18
+ # - Photograph/Pictorial Works
19
+ # - Serial
20
+ #
21
+ # Removes all non-string values
22
+ class GenreFilter
23
+ include Krikri::FieldEnrichment
24
+
25
+ TERMS = ['Book',
26
+ 'Film/Video',
27
+ 'Manuscript',
28
+ 'Maps',
29
+ 'Music',
30
+ 'Musical Score',
31
+ 'Newspapers',
32
+ 'Nonmusic',
33
+ 'Photograph/Pictorial Works',
34
+ 'Serial']
35
+
36
+ def enrich_value(value)
37
+ return nil unless value.is_a? String
38
+ term = TERMS.select do |t|
39
+ t.downcase.gsub(/[^a-zA-Z]/, '') ==
40
+ value.downcase.gsub(/[^a-zA-Z]/, '')
41
+ end
42
+ term.empty? ? nil : term.first
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,21 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip ending punctuation
4
+ #
5
+ # StripEndingPunctuation.new
6
+ # .enrich_value("moomin!...!;,.",)
7
+ # # => "moomin"
8
+ #
9
+ # Leaves quotation marks and closing parentheses & brackets. Also
10
+ # leaves periods when they follow a one or two letter abbreviation.
11
+ class StripEndingPunctuation
12
+ include Krikri::FieldEnrichment
13
+
14
+ def enrich_value(value)
15
+ return value unless value.is_a? String
16
+ value.gsub!(/[^\p{Alnum}\'\"\)\]\}]*$/, '') unless
17
+ value.match /\s*[a-z]{1,2}\.$/i
18
+ value
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip Ending punctuation
4
+ #
5
+ # StripPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,15 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading colons
4
+ #
5
+ # StripLeadingColons.new.enrich_value(";:\tmoominpa()pa;;;")
6
+ # # => "\tmoominpa()pa;;;"
7
+ class StripLeadingColons
8
+ include Krikri::FieldEnrichment
9
+
10
+ def enrich_value(value)
11
+ return value unless value.is_a? String
12
+ value.gsub(/^[\;\:]*/, '')
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading colons
4
+ #
5
+ # StripPunctuation.new.enrich_value(";:\tmoominpa()pa;;;")
6
+ # # => "\tmoominpa()pa;;;"
7
+ class StripLeadingColons
8
+ include Krikri::FieldEnrichment
9
+
10
+ def enrich_value(value)
11
+ return value unless value.is_a? String
12
+ value.gsub(/^[\;\:]*/, '')
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading punctuation
4
+ #
5
+ # StripLeadingPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Krikri::Enrichments
2
+ ##
3
+ # Strip leading punctuation
4
+ #
5
+ # StripPunctuation.new
6
+ # .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
7
+ # # => "\tmoominpapa;:;:; moominmama! ...\n"
8
+ #
9
+ # Leaves quotation marks.
10
+ class StripLeadingPunctuation
11
+ include Krikri::FieldEnrichment
12
+
13
+ def enrich_value(value)
14
+ return value unless value.is_a? String
15
+ value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
16
+ end
17
+ end
18
+ end
@@ -28,18 +28,40 @@ module Krikri::Enrichments
28
28
  class TimespanSplit
29
29
  include Krikri::FieldEnrichment
30
30
 
31
+ ##
32
+ # Enrich a `DPLA::MAP::TimeSpan` object or string value with `begin` and
33
+ # `end` values.
34
+ #
35
+ # @param value [DPLA::MAP::TimeSpan, String, Object]
36
+ #
37
+ # @return [Object] a new `TimeSpan` object containing the providedLabel
38
+ # and the enriched begin/end; if given a value other than a `TimeSpan`
39
+ # or `String` returns that value.
31
40
  def enrich_value(value)
32
41
  value = timespan_from_string(value) if value.is_a? String
33
42
  return value unless value.is_a? DPLA::MAP::TimeSpan
34
43
  populate_timespan(value)
35
44
  end
36
45
 
46
+ ##
47
+ # Converts a string to a `DPLA::MAP::TimeSpan` with the string as
48
+ # `providedLabel`.
49
+ #
50
+ # @param [String] a string value containing a date, time, or timespan
51
+ #
52
+ # @return [DPLA::MAP::TimeSpan] a new, empty timespan with `providedLabel`
37
53
  def timespan_from_string(value)
38
54
  timespan = DPLA::MAP::TimeSpan.new
39
55
  timespan.providedLabel = value
40
56
  timespan
41
57
  end
42
58
 
59
+ ##
60
+ # Populates a timespan with a begin and end date.
61
+ #
62
+ # @param timespan [DPLA::MAP::TimeSpan]
63
+ #
64
+ # @return [DPLA::MAP::TimeSpan]
43
65
  def populate_timespan(timespan)
44
66
  return timespan unless (timespan.begin.empty? || timespan.end.empty?) &&
45
67
  !timespan.providedLabel.empty?
@@ -55,17 +77,37 @@ module Krikri::Enrichments
55
77
  return timespan
56
78
  end
57
79
 
80
+ ##
81
+ # @return [Array<Date, EDTF::Interval>]
58
82
  def parse_labels(labels)
59
83
  labels.map { |l| Krikri::Util::ExtendedDateParser.parse(l, true) }.compact
60
84
  end
61
85
 
86
+ ##
87
+ # Converts an EDTF date to a begin and end date.
88
+ #
89
+ # @param date [Date, DateTime, EDTF::Interval] a date, with or without EDTF
90
+ # precision features; or an interval.
91
+ #
92
+ # @return [Array<Date, DateTime>] an array of two elements containing the
93
+ # begin and end dates.
62
94
  def span_from_date(date)
63
95
  return [nil, nil] if date.nil?
64
- return [date, date] if date.is_a? Date
96
+ if date.is_a?(Date)
97
+ return [date, date] if date.precision == :day
98
+ return [date, (date.succ - 1)]
99
+ end
65
100
  [(date.respond_to?(:first) ? date.first : date.from),
66
101
  (date.respond_to?(:last) ? date.last : date.to)]
67
102
  end
68
103
 
104
+ ##
105
+ # Reduces a timespan with multiple begin or end dates to a single earliest
106
+ # begin date and a single latest end date.
107
+ #
108
+ # @param timespan [DPLA::MAP::TimeSpan] the timespan to reduce
109
+ #
110
+ # @return [DPLA::MAP::TimeSpan] an updated timespan
69
111
  def reduce_to_largest_span(timespan)
70
112
  timespan.begin = timespan.begin.sort.first
71
113
  timespan.end = timespan.end.sort.last