krikri 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/krikri/records_controller.rb +14 -0
- data/app/models/krikri/search_index_document.rb +10 -0
- data/lib/krikri/engine.rb +1 -0
- data/lib/krikri/enricher.rb +156 -0
- data/lib/krikri/enrichment.rb +0 -2
- data/lib/krikri/enrichments/{deduplication.rb~ → dedup_values.rb~} +1 -4
- data/lib/krikri/enrichments/genre_filter.rb +45 -0
- data/lib/krikri/enrichments/strip_ending_punctuation.rb +21 -0
- data/lib/krikri/enrichments/strip_ending_punctuation.rb~ +18 -0
- data/lib/krikri/enrichments/strip_leading_colons.rb +15 -0
- data/lib/krikri/enrichments/strip_leading_colons.rb~ +15 -0
- data/lib/krikri/enrichments/strip_leading_punctuation.rb +18 -0
- data/lib/krikri/enrichments/strip_leading_punctuation.rb~ +18 -0
- data/lib/krikri/enrichments/timespan_split.rb +43 -1
- data/lib/krikri/harvesters/api_harvester.rb +164 -0
- data/lib/krikri/harvesters/api_harvester.rb~ +44 -0
- data/lib/krikri/harvesters/mdl_api_harvester.rb~ +7 -0
- data/lib/krikri/harvesters/mdl_harvester.rb~ +4 -0
- data/lib/krikri/harvesters/oai_harvester.rb +8 -0
- data/lib/krikri/ldp/rdf_source.rb +30 -0
- data/lib/krikri/mapper.rb +1 -3
- data/lib/krikri/provenance_query_client.rb +11 -1
- data/lib/krikri/util/extended_date_parser.rb +46 -11
- data/lib/krikri/version.rb +1 -1
- data/spec/internal/Gemfile.lock +12 -12
- data/spec/internal/config/initializers/blacklight_initializer.rb +1 -1
- data/spec/internal/config/initializers/devise.rb +2 -2
- data/spec/internal/config/secrets.yml +2 -2
- data/spec/internal/db/development.sqlite3 +0 -0
- data/spec/internal/db/migrate/{20150310190235_devise_create_users.rb → 20150320153132_devise_create_users.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190253_create_searches.blacklight.rb → 20150320153151_create_searches.blacklight.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190254_create_bookmarks.blacklight.rb → 20150320153152_create_bookmarks.blacklight.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190255_add_polymorphic_type_to_bookmarks.blacklight.rb → 20150320153153_add_polymorphic_type_to_bookmarks.blacklight.rb} +0 -0
- data/spec/internal/db/schema.rb +1 -1
- data/spec/internal/db/test.sqlite3 +0 -0
- data/spec/internal/log/development.log +75 -73
- data/spec/internal/log/test.log +85076 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_alerts.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_background-variant.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_border-radius.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_buttons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_center-block.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_clearfix.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_forms.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_gradients.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid-framework.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_hide-text.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_image.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_labels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_list-group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-divider.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-vertical-align.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_opacity.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_pagination.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_panels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_progress-bar.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_reset-filter.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_resize.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_responsive-visibility.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_size.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_tab-focus.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_table-row.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-emphasis.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-overflow.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_vendor-prefixes.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/93e201cf4a11978a1f491a057a3bd569c3825210/blacklight.css.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_alerts.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_badges.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_breadcrumbs.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_button-groups.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_buttons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_carousel.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_close.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_code.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_component-animations.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_dropdowns.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_forms.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_glyphicons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_grid.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_input-groups.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_jumbotron.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_labels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_list-group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_media.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_mixins.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_modals.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navbar.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navs.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_normalize.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pager.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pagination.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_panels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_popovers.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_print.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_progress-bars.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-embed.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-utilities.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_scaffolding.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tables.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_thumbnails.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tooltip.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_type.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_utilities.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_variables.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_wells.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap-sprockets.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_blacklight_base.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_bookmark.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_catalog.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_facets.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_header.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_layout.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_modal.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_search_history.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight_defaults.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/059eceaa18e2eaee3d5168f3949f4957 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/093d898bf3f20b26b13c82714a77c040 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/10517c9579f8d23c82fb8caa66dca6a7 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/13fe41fee1fe35b49d145bcc06610705 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/19f4ba6fa86d2609c171da9f5ee3b11a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/2bec18569db80effd80a2a19a038100b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/2f5173deea6c795b8fdde723bb4b63af +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/30ca7caa200fa8eaddaef443913ad9ad +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/350a65d446eb7398f96d102f7a1a6d69 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/357970feca3ac29060c1e3861e2c0953 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/3cc20a63495e66e9251101ccfb0d389c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/4052820c15af72ba690230a0f92bd75e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/41b62fb10530766b5b9fef3338f5f30b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/421c89c58d3e76046a0294dae1f731ff +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/43da966542fafc2a4c768be78527391c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/496a0d7dce1ff6bf4a9c3a089ea3a635 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/4e93570db72bb61820711448b78d548d +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/50b9db0b908b421a9b941a445dbaeacc +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/5155772cce704d83a64aa83eeda20c5a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/528c628cf107f8be6dd122e1154344be +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/5ed02c7072bb024fa0832a1ca4113227 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/60141dc3d16c502963c3d798a099862d +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/6df5b5c0a8e6d87ced9504a460df875e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/6eefd2765a716d66ff857cfcda69a119 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/813e23719129cf9f19349f720bebdf70 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/88ded10f28177bec1bddb0d72d6b1cd2 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/8ad5c3dc0f5fcc5d7a775d6294dda9f5 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/8edfca9082e02111be92e79000667f22 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/90b54a819800edfa41b67722d1561040 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/93aaa9231a32901266b05632f3d35ecd +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9c653367feff82588eb6041d783a5809 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9ce190f082dc4fb74bddb818cc011ce6 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9f68eb44ffcb9dd95623c179300414fd +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ab2a05b91d7d316d4ceb47ce067006fb +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ad4a54c43c2a4c3874bfde1c9f08c248 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b1a42de74c934edd0e5eed0f98a23597 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b1c03c36188b4f58f819510adb9a7e9c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b225d67626669cb154cbe08a3c439d48 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b35e12934e9f05662777579549e31cd7 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b3ae9e3cdd9991d7103b0a00e33b5778 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b42c247628daaf44109584724682a6ad +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b660ec1ede271559cfd0259eed96ae9c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ba078b29a7f067b3acdd538a286235a8 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/bb108ef3fc4c96d1c20cc41f97d943a0 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/bb92f4b8c1bedafe60b6f226b549138a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c644ecad928e076f999e9c9a3bc350c9 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c78dd20a0df27e9394a413bf2e4abf92 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c9efce42580b0ad0374a052aa61f2a07 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/cffd775d018f68ce5dba1ee0d951a994 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d1bce36d6e3ab792562b9c929f2ce897 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d24a7c4b8ac71a90e809881a64970898 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d771ace226fc8215a3572e0aa35bb0d6 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/db0d9534ecdebba33af5d1d81060e855 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/dbba4bbc32c17ade3d618c5d0baeb371 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/e8276b98892dcac8285a592bb63fe75b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/e9f7ccc553ce1a217709cc7a08cfb032 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ecfc314951b349e27742c1b26880619e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/eddae0741d973a8ab3df27c1ea5b635e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f274b5f22db177b6464b50691d531688 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f3935581a84aba9a499005ed6a8e18be +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f3e7729c53b8a2c408086a61b58922ae +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f45dfdb34e84d3c72a3319399a7316a3 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f74af03e26e893af40b07a6d0f970cd4 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f7cbd26ba1d28d48de824f0e94586655 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/fa11f1beb15b0f04503ce1f06701a8bf +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/faac2909046aa87e1f1f08b2bf0ad2b2 +0 -0
- data/spec/lib/krikri/enricher_spec.rb +130 -0
- data/spec/lib/krikri/enrichments/dedup_values_spec.rb~ +10 -0
- data/spec/lib/krikri/enrichments/genre_filter_spec.rb +21 -0
- data/spec/lib/krikri/enrichments/remove_empty_fields_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb +28 -0
- data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_html_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_punctuation_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_whitespace_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/timespan_split_spec.rb +15 -0
- data/spec/lib/krikri/harvesters/api_harvester_spec.rb +98 -0
- data/spec/lib/krikri/harvesters/api_harvester_spec.rb~ +47 -0
- data/spec/lib/krikri/harvesters/mdl_api_harvester_spec.rb~ +11 -0
- data/spec/lib/krikri/harvesters/mdl_harvester_spec.rb~ +8 -0
- data/spec/lib/krikri/harvesters/oai_harvester_spec.rb +12 -6
- data/spec/lib/krikri/job_spec.rb +40 -0
- data/spec/lib/krikri/mapper_agent_spec.rb +10 -6
- data/spec/lib/krikri/util/extended_date_parser_spec.rb +14 -2
- data/spec/models/search_index_document_spec.rb +8 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/support/shared_examples/harvester.rb +8 -1
- data/spec/support/shared_examples/rdf_source.rb +22 -0
- data/spec/support/shared_examples/string_enrichment.rb +13 -4
- metadata +365 -21
- data/lib/krikri/enrichments/capitalize.rb~ +0 -24
- data/lib/krikri/enrichments/genre_filter.rb~ +0 -9
- data/lib/krikri/enrichments/iso_enrcich.rb~ +0 -8
- data/lib/krikri/enrichments/iso_enrich.rb~ +0 -13
- data/lib/krikri/enrichments/parse_date.rb~ +0 -10
- data/lib/krikri/enrichments/strip_html.rb~ +0 -16
- data/lib/krikri/enrichments/timespan.rb~ +0 -6
- data/lib/krikri/enrichments/timespan_split.rb~ +0 -83
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ba2b247b82e5f0d8a3f52adb0e3242d41deff3b
|
4
|
+
data.tar.gz: 429288310bd3531d4fbb20881d7a17983c69a015
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 783c38573ddf5cac189f56220981aeba2768c1eba0a5e4d873d36642f4e633ca9e3ef449ba14ceae49d58b0bc05e426c9b5b84708f0ed7e9788ec305f3416464
|
7
|
+
data.tar.gz: 817df4eee63756335abc562d8badc40160d32d83a92aa5fde93e32ba17ca2938b2b311eed93cf8469756939f6551afd3b0bca25ca2b269f504d6b21b258457b8
|
@@ -80,5 +80,19 @@ module Krikri
|
|
80
80
|
|
81
81
|
config.solr_document_model = Krikri::SearchIndexDocument
|
82
82
|
end
|
83
|
+
|
84
|
+
##
|
85
|
+
# Construct a valid item URI from a local name, and use it to fetch a single
|
86
|
+
# document from the search index.
|
87
|
+
# Override method in Blacklight::SolrHelper.
|
88
|
+
# TODO: This method is depreciated in Blacklight v5.10.
|
89
|
+
# TODO: Write appropriate test for this functionality after it is updated
|
90
|
+
# with Blacklight v5.10.
|
91
|
+
# @param String id is a local name.
|
92
|
+
def get_solr_response_for_doc_id(id=nil, extra_controller_params={})
|
93
|
+
id_uri = Krikri::Settings.marmotta.item_container << '/' << id
|
94
|
+
solr_response = solr_repository.find(id_uri, extra_controller_params)
|
95
|
+
[solr_response, solr_response.documents.first]
|
96
|
+
end
|
83
97
|
end
|
84
98
|
end
|
@@ -4,6 +4,16 @@ module Krikri
|
|
4
4
|
# Represents a single document returned from a query to the search index.
|
5
5
|
class SearchIndexDocument < SolrDocument
|
6
6
|
|
7
|
+
##
|
8
|
+
# Use local name instead of full item id URI in route. For example, a
|
9
|
+
# document with the id 'http://dp.la/marmotta/ldp/items/123ab' will have an
|
10
|
+
# id param of '123ab'. This is necessary because routes that contain '.'
|
11
|
+
# are not valid.
|
12
|
+
# @return String
|
13
|
+
def to_param
|
14
|
+
self[self.class.unique_key].match(/[\/]([^\/]*)\z/)[1]
|
15
|
+
end
|
16
|
+
|
7
17
|
##
|
8
18
|
# Get the aggregation, populated with data from Marmotta, which corresponds
|
9
19
|
# to this SearchIndexDocument
|
data/lib/krikri/engine.rb
CHANGED
@@ -0,0 +1,156 @@
|
|
1
|
+
module Krikri
|
2
|
+
##
|
3
|
+
# A SoftwareAgent that runs enrichment processes.
|
4
|
+
#
|
5
|
+
# @example
|
6
|
+
#
|
7
|
+
# To enrich records that were mapped by the mapping activity with ID 3:
|
8
|
+
#
|
9
|
+
# # Define which enrichments are run, and thier parameters:
|
10
|
+
# chain = {
|
11
|
+
# 'Krikri::Enrichments::StripHtml' => {
|
12
|
+
# input_fields: [{sourceResource: :title}]
|
13
|
+
# },
|
14
|
+
# 'Krikri::Enrichments::StripWhitespace' => {
|
15
|
+
# input_fields: [{sourceResource: :title}]
|
16
|
+
# }
|
17
|
+
# }
|
18
|
+
# Krikri::Enricher.enqueue({
|
19
|
+
# generator_uri: 'http://ldp.local.dp.la/ldp/activity/3',
|
20
|
+
# chain: chain
|
21
|
+
# })
|
22
|
+
#
|
23
|
+
# @see Krikri::SoftwareAgent#enqueue
|
24
|
+
# @see Krikri::Enrichment
|
25
|
+
#
|
26
|
+
class Enricher
|
27
|
+
include SoftwareAgent
|
28
|
+
|
29
|
+
attr_reader :chain, :generator_uri
|
30
|
+
|
31
|
+
def self.queue_name
|
32
|
+
:enrichment
|
33
|
+
end
|
34
|
+
|
35
|
+
##
|
36
|
+
# Create a new Enricher, given a hash of options:
|
37
|
+
# generator_uri: the LDP URI of the Activity that generated the mapped
|
38
|
+
# records that this one will enrich.
|
39
|
+
# chain: a hash specifying the input_fields and output_fields, as
|
40
|
+
# illustrated above, which will be passed to the Enrichment.
|
41
|
+
#
|
42
|
+
# @see Krikri::Enrichment
|
43
|
+
# @param opts [Hash] a hash of options
|
44
|
+
def initialize(opts = {})
|
45
|
+
@generator_uri = RDF::URI(opts.fetch(:generator_uri))
|
46
|
+
@chain = deep_sym(opts.fetch(:chain) { {} })
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# Run the enrichmnt.
|
51
|
+
#
|
52
|
+
# Take each record that was affected by the activity defined by our
|
53
|
+
# instantiation, and apply each enrichment from the enrichment chain.
|
54
|
+
#
|
55
|
+
def run(activity_uri = nil)
|
56
|
+
log :info, 'enricher is running'
|
57
|
+
# see TODO below
|
58
|
+
target_aggregations.each do |agg|
|
59
|
+
begin
|
60
|
+
chain_enrichments!(agg)
|
61
|
+
activity_uri ? agg.save_with_provenance(activity_uri) : agg.save
|
62
|
+
rescue => e
|
63
|
+
log :error, "Enrichment error: #{e.message}\n#{e.backtrace}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
log :info, 'enricher is done'
|
67
|
+
end
|
68
|
+
|
69
|
+
# TODO: remove this when the current topic branch that introduces the
|
70
|
+
# EntityConsumer mixin has been merged.
|
71
|
+
def target_aggregations
|
72
|
+
query = Krikri::ProvenanceQueryClient.find_by_activity(generator_uri)
|
73
|
+
query.execute.lazy.flat_map do |solution|
|
74
|
+
agg = DPLA::MAP::Aggregation.new(solution.record.to_s)
|
75
|
+
agg.get
|
76
|
+
agg
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# Given an aggregation, take each enrichment specified by the `chain'
|
82
|
+
# given in our instantiation, and apply that enrichment, with the given
|
83
|
+
# options, modifying the aggregation in-place.
|
84
|
+
#
|
85
|
+
def chain_enrichments!(agg)
|
86
|
+
chain.keys.each do |e|
|
87
|
+
enrichment = e.to_s.constantize.new
|
88
|
+
if enrichment.is_a? Krikri::FieldEnrichment
|
89
|
+
agg = do_field_enrichment(agg, enrichment, chain[e])
|
90
|
+
else
|
91
|
+
agg = do_basic_enrichment(agg, enrichment, chain[e])
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
##
|
99
|
+
# Perform a default enrichment, using Enrichment#enrichment or a derived
|
100
|
+
# class that expects the same arguments.
|
101
|
+
#
|
102
|
+
# @param agg [DPLA::MAP::Aggregation]
|
103
|
+
# @param enrichment [Krikri::Enrichment]
|
104
|
+
# @param options [Hash]
|
105
|
+
#
|
106
|
+
# @see Krikri::Enrichment
|
107
|
+
#
|
108
|
+
def do_basic_enrichment(agg, enrichment, options)
|
109
|
+
enrichment.enrich(
|
110
|
+
agg, options[:input_fields], options[:output_fields]
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# Perform a FieldEnrichment enrichment on the given aggregation.
|
116
|
+
#
|
117
|
+
# With FieldEnrichment#enrich, the input_fields option parameter is passed
|
118
|
+
# as a variable arguments list
|
119
|
+
#
|
120
|
+
# @param agg [DPLA::MAP::Aggregation]
|
121
|
+
# @param enrichment [Krikri::FieldEnrichment]
|
122
|
+
# @param options [Hash] Hash with :input_fields containing variable
|
123
|
+
# arguments list
|
124
|
+
#
|
125
|
+
# @see Krikri::FieldEnrichment
|
126
|
+
#
|
127
|
+
def do_field_enrichment(agg, enrichment, options)
|
128
|
+
enrichment.enrich(agg, *options[:input_fields])
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# Transform the given hash recursively by turning all of its string keys
|
133
|
+
# and values into symbols.
|
134
|
+
#
|
135
|
+
# Symbols are expected in the enrichment classes, and we will usually be
|
136
|
+
# dealing with values that have been deserialized from JSON.
|
137
|
+
#
|
138
|
+
def deep_sym(obj)
|
139
|
+
if obj.is_a? Hash
|
140
|
+
return obj.inject({}) do |memo, (k, v)|
|
141
|
+
memo[k.to_sym] = deep_sym(v)
|
142
|
+
memo
|
143
|
+
end
|
144
|
+
elsif obj.is_a? Array
|
145
|
+
return obj.inject([]) do |memo, el|
|
146
|
+
memo << deep_sym(el)
|
147
|
+
memo
|
148
|
+
end
|
149
|
+
elsif obj.respond_to? :to_sym
|
150
|
+
return obj.to_sym
|
151
|
+
else
|
152
|
+
return nil
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
data/lib/krikri/enrichment.rb
CHANGED
@@ -3,8 +3,6 @@ module Krikri
|
|
3
3
|
# Mixin module for enriching a set of input_fields and setting the resulting
|
4
4
|
# values to a set of output fields.
|
5
5
|
module Enrichment
|
6
|
-
extend SoftwareAgent
|
7
|
-
|
8
6
|
##
|
9
7
|
# The main enrichment method; passes specified input fields to
|
10
8
|
# #enrich_values, which must return an array of values with length equal to
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Enrichment to remove non-genre fields from
|
4
|
+
#
|
5
|
+
# StripHtml.new.enrich_value('Book') => 'Book'
|
6
|
+
# StripHtml.new.enrich_value('not a book') => nil
|
7
|
+
#
|
8
|
+
# Allowed genre terms are:
|
9
|
+
#
|
10
|
+
# - Book
|
11
|
+
# - Film/Video
|
12
|
+
# - Manuscript
|
13
|
+
# - Maps
|
14
|
+
# - Music
|
15
|
+
# - Musical Score
|
16
|
+
# - Newspapers
|
17
|
+
# - Nonmusic
|
18
|
+
# - Photograph/Pictorial Works
|
19
|
+
# - Serial
|
20
|
+
#
|
21
|
+
# Removes all non-string values
|
22
|
+
class GenreFilter
|
23
|
+
include Krikri::FieldEnrichment
|
24
|
+
|
25
|
+
TERMS = ['Book',
|
26
|
+
'Film/Video',
|
27
|
+
'Manuscript',
|
28
|
+
'Maps',
|
29
|
+
'Music',
|
30
|
+
'Musical Score',
|
31
|
+
'Newspapers',
|
32
|
+
'Nonmusic',
|
33
|
+
'Photograph/Pictorial Works',
|
34
|
+
'Serial']
|
35
|
+
|
36
|
+
def enrich_value(value)
|
37
|
+
return nil unless value.is_a? String
|
38
|
+
term = TERMS.select do |t|
|
39
|
+
t.downcase.gsub(/[^a-zA-Z]/, '') ==
|
40
|
+
value.downcase.gsub(/[^a-zA-Z]/, '')
|
41
|
+
end
|
42
|
+
term.empty? ? nil : term.first
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip ending punctuation
|
4
|
+
#
|
5
|
+
# StripEndingPunctuation.new
|
6
|
+
# .enrich_value("moomin!...!;,.",)
|
7
|
+
# # => "moomin"
|
8
|
+
#
|
9
|
+
# Leaves quotation marks and closing parentheses & brackets. Also
|
10
|
+
# leaves periods when they follow a one or two letter abbreviation.
|
11
|
+
class StripEndingPunctuation
|
12
|
+
include Krikri::FieldEnrichment
|
13
|
+
|
14
|
+
def enrich_value(value)
|
15
|
+
return value unless value.is_a? String
|
16
|
+
value.gsub!(/[^\p{Alnum}\'\"\)\]\}]*$/, '') unless
|
17
|
+
value.match /\s*[a-z]{1,2}\.$/i
|
18
|
+
value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip Ending punctuation
|
4
|
+
#
|
5
|
+
# StripPunctuation.new
|
6
|
+
# .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
|
7
|
+
# # => "\tmoominpapa;:;:; moominmama! ...\n"
|
8
|
+
#
|
9
|
+
# Leaves quotation marks.
|
10
|
+
class StripLeadingPunctuation
|
11
|
+
include Krikri::FieldEnrichment
|
12
|
+
|
13
|
+
def enrich_value(value)
|
14
|
+
return value unless value.is_a? String
|
15
|
+
value.gsub(/[^\p{Alnum}\'\"\s]*/, '')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip leading colons
|
4
|
+
#
|
5
|
+
# StripLeadingColons.new.enrich_value(";:\tmoominpa()pa;;;")
|
6
|
+
# # => "\tmoominpa()pa;;;"
|
7
|
+
class StripLeadingColons
|
8
|
+
include Krikri::FieldEnrichment
|
9
|
+
|
10
|
+
def enrich_value(value)
|
11
|
+
return value unless value.is_a? String
|
12
|
+
value.gsub(/^[\;\:]*/, '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip leading colons
|
4
|
+
#
|
5
|
+
# StripPunctuation.new.enrich_value(";:\tmoominpa()pa;;;")
|
6
|
+
# # => "\tmoominpa()pa;;;"
|
7
|
+
class StripLeadingColons
|
8
|
+
include Krikri::FieldEnrichment
|
9
|
+
|
10
|
+
def enrich_value(value)
|
11
|
+
return value unless value.is_a? String
|
12
|
+
value.gsub(/^[\;\:]*/, '')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip leading punctuation
|
4
|
+
#
|
5
|
+
# StripLeadingPunctuation.new
|
6
|
+
# .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
|
7
|
+
# # => "\tmoominpapa;:;:; moominmama! ...\n"
|
8
|
+
#
|
9
|
+
# Leaves quotation marks.
|
10
|
+
class StripLeadingPunctuation
|
11
|
+
include Krikri::FieldEnrichment
|
12
|
+
|
13
|
+
def enrich_value(value)
|
14
|
+
return value unless value.is_a? String
|
15
|
+
value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Krikri::Enrichments
|
2
|
+
##
|
3
|
+
# Strip leading punctuation
|
4
|
+
#
|
5
|
+
# StripPunctuation.new
|
6
|
+
# .enrich_value("([!.;:\tmoominpapa;:;:; moominmama! ...\n")
|
7
|
+
# # => "\tmoominpapa;:;:; moominmama! ...\n"
|
8
|
+
#
|
9
|
+
# Leaves quotation marks.
|
10
|
+
class StripLeadingPunctuation
|
11
|
+
include Krikri::FieldEnrichment
|
12
|
+
|
13
|
+
def enrich_value(value)
|
14
|
+
return value unless value.is_a? String
|
15
|
+
value.gsub(/^[^\p{Alnum}\'\"\s]*/, '')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -28,18 +28,40 @@ module Krikri::Enrichments
|
|
28
28
|
class TimespanSplit
|
29
29
|
include Krikri::FieldEnrichment
|
30
30
|
|
31
|
+
##
|
32
|
+
# Enrich a `DPLA::MAP::TimeSpan` object or string value with `begin` and
|
33
|
+
# `end` values.
|
34
|
+
#
|
35
|
+
# @param value [DPLA::MAP::TimeSpan, String, Object]
|
36
|
+
#
|
37
|
+
# @return [Object] a new `TimeSpan` object containing the providedLabel
|
38
|
+
# and the enriched begin/end; if given a value other than a `TimeSpan`
|
39
|
+
# or `String` returns that value.
|
31
40
|
def enrich_value(value)
|
32
41
|
value = timespan_from_string(value) if value.is_a? String
|
33
42
|
return value unless value.is_a? DPLA::MAP::TimeSpan
|
34
43
|
populate_timespan(value)
|
35
44
|
end
|
36
45
|
|
46
|
+
##
|
47
|
+
# Converts a string to a `DPLA::MAP::TimeSpan` with the string as
|
48
|
+
# `providedLabel`.
|
49
|
+
#
|
50
|
+
# @param [String] a string value containing a date, time, or timespan
|
51
|
+
#
|
52
|
+
# @return [DPLA::MAP::TimeSpan] a new, empty timespan with `providedLabel`
|
37
53
|
def timespan_from_string(value)
|
38
54
|
timespan = DPLA::MAP::TimeSpan.new
|
39
55
|
timespan.providedLabel = value
|
40
56
|
timespan
|
41
57
|
end
|
42
58
|
|
59
|
+
##
|
60
|
+
# Populates a timespan with a begin and end date.
|
61
|
+
#
|
62
|
+
# @param timespan [DPLA::MAP::TimeSpan]
|
63
|
+
#
|
64
|
+
# @return [DPLA::MAP::TimeSpan]
|
43
65
|
def populate_timespan(timespan)
|
44
66
|
return timespan unless (timespan.begin.empty? || timespan.end.empty?) &&
|
45
67
|
!timespan.providedLabel.empty?
|
@@ -55,17 +77,37 @@ module Krikri::Enrichments
|
|
55
77
|
return timespan
|
56
78
|
end
|
57
79
|
|
80
|
+
##
|
81
|
+
# @return [Array<Date, EDTF::Interval>]
|
58
82
|
def parse_labels(labels)
|
59
83
|
labels.map { |l| Krikri::Util::ExtendedDateParser.parse(l, true) }.compact
|
60
84
|
end
|
61
85
|
|
86
|
+
##
|
87
|
+
# Converts an EDTF date to a begin and end date.
|
88
|
+
#
|
89
|
+
# @param date [Date, DateTime, EDTF::Interval] a date, with or without EDTF
|
90
|
+
# precision features; or an interval.
|
91
|
+
#
|
92
|
+
# @return [Array<Date, DateTime>] an array of two elements containing the
|
93
|
+
# begin and end dates.
|
62
94
|
def span_from_date(date)
|
63
95
|
return [nil, nil] if date.nil?
|
64
|
-
|
96
|
+
if date.is_a?(Date)
|
97
|
+
return [date, date] if date.precision == :day
|
98
|
+
return [date, (date.succ - 1)]
|
99
|
+
end
|
65
100
|
[(date.respond_to?(:first) ? date.first : date.from),
|
66
101
|
(date.respond_to?(:last) ? date.last : date.to)]
|
67
102
|
end
|
68
103
|
|
104
|
+
##
|
105
|
+
# Reduces a timespan with multiple begin or end dates to a single earliest
|
106
|
+
# begin date and a single latest end date.
|
107
|
+
#
|
108
|
+
# @param timespan [DPLA::MAP::TimeSpan] the timespan to reduce
|
109
|
+
#
|
110
|
+
# @return [DPLA::MAP::TimeSpan] an updated timespan
|
69
111
|
def reduce_to_largest_span(timespan)
|
70
112
|
timespan.begin = timespan.begin.sort.first
|
71
113
|
timespan.end = timespan.end.sort.last
|