krikri 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/krikri/records_controller.rb +14 -0
- data/app/models/krikri/search_index_document.rb +10 -0
- data/lib/krikri/engine.rb +1 -0
- data/lib/krikri/enricher.rb +156 -0
- data/lib/krikri/enrichment.rb +0 -2
- data/lib/krikri/enrichments/{deduplication.rb~ → dedup_values.rb~} +1 -4
- data/lib/krikri/enrichments/genre_filter.rb +45 -0
- data/lib/krikri/enrichments/strip_ending_punctuation.rb +21 -0
- data/lib/krikri/enrichments/strip_ending_punctuation.rb~ +18 -0
- data/lib/krikri/enrichments/strip_leading_colons.rb +15 -0
- data/lib/krikri/enrichments/strip_leading_colons.rb~ +15 -0
- data/lib/krikri/enrichments/strip_leading_punctuation.rb +18 -0
- data/lib/krikri/enrichments/strip_leading_punctuation.rb~ +18 -0
- data/lib/krikri/enrichments/timespan_split.rb +43 -1
- data/lib/krikri/harvesters/api_harvester.rb +164 -0
- data/lib/krikri/harvesters/api_harvester.rb~ +44 -0
- data/lib/krikri/harvesters/mdl_api_harvester.rb~ +7 -0
- data/lib/krikri/harvesters/mdl_harvester.rb~ +4 -0
- data/lib/krikri/harvesters/oai_harvester.rb +8 -0
- data/lib/krikri/ldp/rdf_source.rb +30 -0
- data/lib/krikri/mapper.rb +1 -3
- data/lib/krikri/provenance_query_client.rb +11 -1
- data/lib/krikri/util/extended_date_parser.rb +46 -11
- data/lib/krikri/version.rb +1 -1
- data/spec/internal/Gemfile.lock +12 -12
- data/spec/internal/config/initializers/blacklight_initializer.rb +1 -1
- data/spec/internal/config/initializers/devise.rb +2 -2
- data/spec/internal/config/secrets.yml +2 -2
- data/spec/internal/db/development.sqlite3 +0 -0
- data/spec/internal/db/migrate/{20150310190235_devise_create_users.rb → 20150320153132_devise_create_users.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190253_create_searches.blacklight.rb → 20150320153151_create_searches.blacklight.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190254_create_bookmarks.blacklight.rb → 20150320153152_create_bookmarks.blacklight.rb} +0 -0
- data/spec/internal/db/migrate/{20150310190255_add_polymorphic_type_to_bookmarks.blacklight.rb → 20150320153153_add_polymorphic_type_to_bookmarks.blacklight.rb} +0 -0
- data/spec/internal/db/schema.rb +1 -1
- data/spec/internal/db/test.sqlite3 +0 -0
- data/spec/internal/log/development.log +75 -73
- data/spec/internal/log/test.log +85076 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_alerts.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_background-variant.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_border-radius.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_buttons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_center-block.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_clearfix.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_forms.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_gradients.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid-framework.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_grid.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_hide-text.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_image.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_labels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_list-group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-divider.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_nav-vertical-align.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_opacity.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_pagination.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_panels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_progress-bar.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_reset-filter.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_resize.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_responsive-visibility.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_size.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_tab-focus.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_table-row.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-emphasis.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_text-overflow.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/4a3ee647961c7e45976eb2c0a94406aad3427b3d/_vendor-prefixes.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/93e201cf4a11978a1f491a057a3bd569c3825210/blacklight.css.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_alerts.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_badges.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_breadcrumbs.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_button-groups.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_buttons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_carousel.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_close.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_code.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_component-animations.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_dropdowns.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_forms.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_glyphicons.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_grid.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_input-groups.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_jumbotron.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_labels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_list-group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_media.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_mixins.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_modals.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navbar.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_navs.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_normalize.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pager.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_pagination.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_panels.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_popovers.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_print.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_progress-bars.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-embed.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_responsive-utilities.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_scaffolding.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tables.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_thumbnails.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_tooltip.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_type.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_utilities.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_variables.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/a1ec1bb9c9cafeb054d542e861ebc8ffd5904439/_wells.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap-sprockets.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/b28605b1c659cf09fc72f3c1fff32918869d28b8/_bootstrap.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_blacklight_base.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_bookmark.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_catalog.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_facets.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_group.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_header.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_layout.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_modal.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/_search_history.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sass/ca8c12d03785e0d6cd4554f4d3939e7836d38282/blacklight_defaults.scssc +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/059eceaa18e2eaee3d5168f3949f4957 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/093d898bf3f20b26b13c82714a77c040 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/10517c9579f8d23c82fb8caa66dca6a7 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/13fe41fee1fe35b49d145bcc06610705 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/19f4ba6fa86d2609c171da9f5ee3b11a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/2bec18569db80effd80a2a19a038100b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/2f5173deea6c795b8fdde723bb4b63af +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/30ca7caa200fa8eaddaef443913ad9ad +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/350a65d446eb7398f96d102f7a1a6d69 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/357970feca3ac29060c1e3861e2c0953 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/3cc20a63495e66e9251101ccfb0d389c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/4052820c15af72ba690230a0f92bd75e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/41b62fb10530766b5b9fef3338f5f30b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/421c89c58d3e76046a0294dae1f731ff +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/43da966542fafc2a4c768be78527391c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/496a0d7dce1ff6bf4a9c3a089ea3a635 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/4e93570db72bb61820711448b78d548d +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/50b9db0b908b421a9b941a445dbaeacc +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/5155772cce704d83a64aa83eeda20c5a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/528c628cf107f8be6dd122e1154344be +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/5ed02c7072bb024fa0832a1ca4113227 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/60141dc3d16c502963c3d798a099862d +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/6df5b5c0a8e6d87ced9504a460df875e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/6eefd2765a716d66ff857cfcda69a119 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/813e23719129cf9f19349f720bebdf70 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/88ded10f28177bec1bddb0d72d6b1cd2 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/8ad5c3dc0f5fcc5d7a775d6294dda9f5 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/8edfca9082e02111be92e79000667f22 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/90b54a819800edfa41b67722d1561040 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/93aaa9231a32901266b05632f3d35ecd +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9c653367feff82588eb6041d783a5809 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9ce190f082dc4fb74bddb818cc011ce6 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/9f68eb44ffcb9dd95623c179300414fd +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ab2a05b91d7d316d4ceb47ce067006fb +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ad4a54c43c2a4c3874bfde1c9f08c248 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b1a42de74c934edd0e5eed0f98a23597 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b1c03c36188b4f58f819510adb9a7e9c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b225d67626669cb154cbe08a3c439d48 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b35e12934e9f05662777579549e31cd7 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b3ae9e3cdd9991d7103b0a00e33b5778 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b42c247628daaf44109584724682a6ad +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/b660ec1ede271559cfd0259eed96ae9c +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ba078b29a7f067b3acdd538a286235a8 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/bb108ef3fc4c96d1c20cc41f97d943a0 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/bb92f4b8c1bedafe60b6f226b549138a +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c644ecad928e076f999e9c9a3bc350c9 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c78dd20a0df27e9394a413bf2e4abf92 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/c9efce42580b0ad0374a052aa61f2a07 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/cffd775d018f68ce5dba1ee0d951a994 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d1bce36d6e3ab792562b9c929f2ce897 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d24a7c4b8ac71a90e809881a64970898 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/d771ace226fc8215a3572e0aa35bb0d6 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/db0d9534ecdebba33af5d1d81060e855 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/dbba4bbc32c17ade3d618c5d0baeb371 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/e8276b98892dcac8285a592bb63fe75b +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/e9f7ccc553ce1a217709cc7a08cfb032 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/ecfc314951b349e27742c1b26880619e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/eddae0741d973a8ab3df27c1ea5b635e +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f274b5f22db177b6464b50691d531688 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f3935581a84aba9a499005ed6a8e18be +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f3e7729c53b8a2c408086a61b58922ae +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f45dfdb34e84d3c72a3319399a7316a3 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f74af03e26e893af40b07a6d0f970cd4 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/f7cbd26ba1d28d48de824f0e94586655 +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/fa11f1beb15b0f04503ce1f06701a8bf +0 -0
- data/spec/internal/tmp/cache/assets/test/sprockets/faac2909046aa87e1f1f08b2bf0ad2b2 +0 -0
- data/spec/lib/krikri/enricher_spec.rb +130 -0
- data/spec/lib/krikri/enrichments/dedup_values_spec.rb~ +10 -0
- data/spec/lib/krikri/enrichments/genre_filter_spec.rb +21 -0
- data/spec/lib/krikri/enrichments/remove_empty_fields_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb +28 -0
- data/spec/lib/krikri/enrichments/strip_ending_punctuation_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_html_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_colons_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb +16 -0
- data/spec/lib/krikri/enrichments/strip_leading_punctuation_spec.rb~ +16 -0
- data/spec/lib/krikri/enrichments/strip_punctuation_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/strip_whitespace_spec.rb +1 -0
- data/spec/lib/krikri/enrichments/timespan_split_spec.rb +15 -0
- data/spec/lib/krikri/harvesters/api_harvester_spec.rb +98 -0
- data/spec/lib/krikri/harvesters/api_harvester_spec.rb~ +47 -0
- data/spec/lib/krikri/harvesters/mdl_api_harvester_spec.rb~ +11 -0
- data/spec/lib/krikri/harvesters/mdl_harvester_spec.rb~ +8 -0
- data/spec/lib/krikri/harvesters/oai_harvester_spec.rb +12 -6
- data/spec/lib/krikri/job_spec.rb +40 -0
- data/spec/lib/krikri/mapper_agent_spec.rb +10 -6
- data/spec/lib/krikri/util/extended_date_parser_spec.rb +14 -2
- data/spec/models/search_index_document_spec.rb +8 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/support/shared_examples/harvester.rb +8 -1
- data/spec/support/shared_examples/rdf_source.rb +22 -0
- data/spec/support/shared_examples/string_enrichment.rb +13 -4
- metadata +365 -21
- data/lib/krikri/enrichments/capitalize.rb~ +0 -24
- data/lib/krikri/enrichments/genre_filter.rb~ +0 -9
- data/lib/krikri/enrichments/iso_enrcich.rb~ +0 -8
- data/lib/krikri/enrichments/iso_enrich.rb~ +0 -13
- data/lib/krikri/enrichments/parse_date.rb~ +0 -10
- data/lib/krikri/enrichments/strip_html.rb~ +0 -16
- data/lib/krikri/enrichments/timespan.rb~ +0 -6
- data/lib/krikri/enrichments/timespan_split.rb~ +0 -83
@@ -0,0 +1,164 @@
|
|
1
|
+
module Krikri::Harvesters
|
2
|
+
##
|
3
|
+
# A harvester implementation for REST APIs. The default ApiHarvester expects
|
4
|
+
# Solr-like JSON responses/records.
|
5
|
+
#
|
6
|
+
# An internal interface is provided for easier subclassing. A new API
|
7
|
+
# harvester may reimplement:
|
8
|
+
# - #get_docs (to retrieve record docs from a response)
|
9
|
+
# - #get_count (to determine total record count from a response)
|
10
|
+
# - #get_identifier (to retrieve an indentifier from a record document)
|
11
|
+
# - #get_content (to retrieve a content string from a record document)
|
12
|
+
# - #next_options` (to generate the parameters for the next request)
|
13
|
+
#
|
14
|
+
# If the content type of the records is other than JSON, you will also want
|
15
|
+
# to override `#content_type`.
|
16
|
+
class ApiHarvester
|
17
|
+
include Krikri::Harvester
|
18
|
+
|
19
|
+
attr_reader :opts
|
20
|
+
|
21
|
+
##
|
22
|
+
# @param opts [Hash] options for the harvester
|
23
|
+
# @see .expected_opts
|
24
|
+
def initialize(opts = {})
|
25
|
+
super
|
26
|
+
@opts = opts.fetch(:api, {})
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# @return [Hash] A hash documenting the allowable options to pass to
|
31
|
+
# initializers.
|
32
|
+
#
|
33
|
+
# @see Krikri::Harvester::expected_opts
|
34
|
+
def self.expected_opts
|
35
|
+
{
|
36
|
+
key: :api,
|
37
|
+
opts: {
|
38
|
+
params: { type: :string, required: false }
|
39
|
+
}
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# @see Krikri::Harvester#count
|
45
|
+
def count
|
46
|
+
get_count(request(opts))
|
47
|
+
end
|
48
|
+
|
49
|
+
##
|
50
|
+
# @return [Enumerator::Lazy] an enumerator of the records targeted by this
|
51
|
+
# harvester.
|
52
|
+
def records
|
53
|
+
enumerate_records.lazy.map { |rec| build_record(rec) }
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Gets a single record with the given identifier from the API
|
58
|
+
#
|
59
|
+
# @return [Enumerator::Lazy] an enumerator over the ids for the records
|
60
|
+
# targeted by this harvester.
|
61
|
+
def record_ids
|
62
|
+
enumerate_records.lazy.map { |r| get_identifier(r) }
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# @param identifier [#to_s] the identifier of the record to get
|
67
|
+
# @return [#to_s] the record
|
68
|
+
def get_record(identifier)
|
69
|
+
response = request(:params => { :q => "id:#{identifier.to_s}" })
|
70
|
+
build_record(get_docs(response).first)
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# @return [String] the content type for the records generated by this
|
75
|
+
# harvester
|
76
|
+
def content_type
|
77
|
+
'application/json'
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
##
|
83
|
+
# @param doc [#to_s] a raw record document with an identifier
|
84
|
+
#
|
85
|
+
# @return [String] the provider's identifier for the document
|
86
|
+
def get_identifier(doc)
|
87
|
+
doc['record_id']
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# @param response [#to_s] a response from the REST API
|
92
|
+
#
|
93
|
+
# @return [Integer] a count of the total records found by the request
|
94
|
+
def get_count(response)
|
95
|
+
response['response']['numFound']
|
96
|
+
end
|
97
|
+
|
98
|
+
##
|
99
|
+
# @param response [#to_s] a response from the REST API
|
100
|
+
#
|
101
|
+
# @return [Array] an array of record documents from the response
|
102
|
+
def get_docs(response)
|
103
|
+
response['response']['docs']
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# @param doc [#to_s] a raw record document
|
108
|
+
#
|
109
|
+
# @return [String] the record content
|
110
|
+
def get_content(doc)
|
111
|
+
doc.to_json
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# Send a request via `RestClient`, and parse the result as JSON
|
116
|
+
def request(request_opts)
|
117
|
+
JSON.parse(RestClient.get(uri, request_opts))
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Given a current set of options and a number of records from the last
|
122
|
+
# request, generate the options for the next request.
|
123
|
+
#
|
124
|
+
# @param opts [Hash] an options hash from the previous request
|
125
|
+
# @param record_count [#to_i]
|
126
|
+
#
|
127
|
+
# @return [Hash] the next request's options hash
|
128
|
+
def next_options(opts, record_count)
|
129
|
+
old_start = opts['params'].fetch('start', 0)
|
130
|
+
opts['params']['start'] = old_start.to_i + record_count
|
131
|
+
opts
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# @return [Enumerator] an enumerator over the records
|
136
|
+
def enumerate_records
|
137
|
+
Enumerator.new do |yielder|
|
138
|
+
request_opts = opts.deep_dup
|
139
|
+
loop do
|
140
|
+
break if request_opts.nil?
|
141
|
+
docs = get_docs(request(request_opts.dup))
|
142
|
+
break if docs.empty?
|
143
|
+
|
144
|
+
docs.each { |r| yielder << r }
|
145
|
+
|
146
|
+
request_opts = next_options(request_opts, docs.count)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# Builds an instance of `@record_class` with the given doc's JSON as
|
153
|
+
# content.
|
154
|
+
#
|
155
|
+
# @param doc [#to_json] the content to serialize as JSON in `#content`
|
156
|
+
# @return [#to_s] an instance of @record_class with a minted id and
|
157
|
+
# content the given content
|
158
|
+
def build_record(doc)
|
159
|
+
@record_class.build(mint_id(get_identifier(doc)),
|
160
|
+
get_content(doc),
|
161
|
+
content_type)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Krikri::Harvesters
|
2
|
+
##
|
3
|
+
# A harvester implementation for REST APIs
|
4
|
+
class ApiHarvester
|
5
|
+
include Krikri::Harvester
|
6
|
+
|
7
|
+
def initialize(opts = {})
|
8
|
+
super
|
9
|
+
@opts = opts.fetch(:api, {})
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# @return [Hash] A hash documenting the allowable options to pass to
|
14
|
+
# initializers.
|
15
|
+
#
|
16
|
+
# @see Krikri::Harvester::expected_opts
|
17
|
+
def self.expected_opts
|
18
|
+
{
|
19
|
+
key: :api,
|
20
|
+
opts: {
|
21
|
+
params: { type: :string, required: false }
|
22
|
+
}
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# @see Krikri::Harvester#count
|
28
|
+
def count
|
29
|
+
JSON.parse(request(opts['params']))['response']['numFound']
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# @return [Enumerator::Lazy] an enumerator of the records targeted by this
|
34
|
+
# harvester.
|
35
|
+
def records
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def request(opts)
|
41
|
+
RestClient.get uri, opts
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -156,6 +156,14 @@ module Krikri::Harvesters
|
|
156
156
|
##
|
157
157
|
# Runs the request in the given block against the sets specified in `opts`.
|
158
158
|
# Results are concatenated into a single enumerator
|
159
|
+
#
|
160
|
+
# @param opts [Hash] the options to pass, including all sets to process.
|
161
|
+
# @yield [set_opts] gives options to the block once for each set. The
|
162
|
+
# block should run the harvest action with the options and give an
|
163
|
+
# Enumerable.
|
164
|
+
#
|
165
|
+
# @return [Enumerator::Lazy] A lazy enumerator concatenating the results
|
166
|
+
# of the block with each set.
|
159
167
|
def request_with_sets(opts, &block)
|
160
168
|
sets = Array(opts.delete(:set))
|
161
169
|
if opts[:skip_set]
|
@@ -2,6 +2,8 @@ module Krikri::LDP
|
|
2
2
|
##
|
3
3
|
# Adds simple LDP persistence to ActiveTriples::Resource classes
|
4
4
|
# @see ActiveTriples::Resource
|
5
|
+
#
|
6
|
+
# @see http://www.w3.org/TR/ldp/#ldprs
|
5
7
|
module RdfSource
|
6
8
|
extend ActiveSupport::Concern
|
7
9
|
include Krikri::LDP::Resource
|
@@ -30,6 +32,34 @@ module Krikri::LDP
|
|
30
32
|
result
|
31
33
|
end
|
32
34
|
|
35
|
+
##
|
36
|
+
# Adds an appropritate provenance statement with the given URI and saves
|
37
|
+
# the resource.
|
38
|
+
#
|
39
|
+
# This method treats RDFSources as stateful resources. This is in conflict
|
40
|
+
# with the PROV model, which assumes each revision is its own Resource. The
|
41
|
+
# internal predicate `dpla:wasRevisedBy` is used for non-generating
|
42
|
+
# revisions of stateful RDFSources.
|
43
|
+
#
|
44
|
+
# @todo Assuming a Marmotta LDP server, there are version URIs available
|
45
|
+
# (via Memento) which could be used for a direct PROV implementation.
|
46
|
+
# Consider options for doing that either alongside or in place of this
|
47
|
+
# approach.
|
48
|
+
#
|
49
|
+
# @param activity_uri [#to_term] the URI of the prov:Activity to mark as
|
50
|
+
# generating or revising the saved resource.
|
51
|
+
#
|
52
|
+
# @see #save
|
53
|
+
#
|
54
|
+
# @see http://www.w3.org/TR/prov-primer/
|
55
|
+
# @see http://www.w3.org/TR/2013/REC-prov-o-20130430/
|
56
|
+
def save_with_provenance(activity_uri)
|
57
|
+
predicate =
|
58
|
+
exists? ? RDF::DPLA.wasRevisedBy : RDF::PROV.wasGeneratedBy
|
59
|
+
self << RDF::Statement(self, predicate, activity_uri)
|
60
|
+
save
|
61
|
+
end
|
62
|
+
|
33
63
|
private
|
34
64
|
|
35
65
|
##
|
data/lib/krikri/mapper.rb
CHANGED
@@ -109,9 +109,7 @@ module Krikri
|
|
109
109
|
Krikri::Mapper.map(name, records).each do |rec|
|
110
110
|
begin
|
111
111
|
rec.mint_id! if rec.node?
|
112
|
-
|
113
|
-
activity_uri
|
114
|
-
rec.save
|
112
|
+
activity_uri ? rec.save_with_provenance(activity_uri) : rec.save
|
115
113
|
rescue => e
|
116
114
|
Rails.logger.error("Error saving record: #{rec.rdf_subject}\n" \
|
117
115
|
"#{e.message}\n#{e.backtrace}")
|
@@ -6,11 +6,21 @@ module Krikri
|
|
6
6
|
|
7
7
|
module_function
|
8
8
|
|
9
|
+
##
|
10
|
+
# Finds all entities generated or revised by the activity whose URI is
|
11
|
+
# given.
|
12
|
+
#
|
13
|
+
# @param activity_uri [#to_uri] the URI of the activity to search
|
14
|
+
#
|
15
|
+
# @return [RDF::SPARQL::Query] a query object that, when executed, will
|
16
|
+
# give solutions containing the URIs for the resources in `#record`.
|
9
17
|
def find_by_activity(activity_uri)
|
10
18
|
raise ArgumentError, 'activity_uri must be an RDF::URI' unless
|
11
19
|
activity_uri.respond_to? :to_uri
|
12
20
|
SPARQL_CLIENT.select(:record)
|
13
|
-
.where([:record,
|
21
|
+
.where([:record,
|
22
|
+
[RDF::PROV.wasGeneratedBy, '|', RDF::DPLA.wasRevisedBy],
|
23
|
+
activity_uri])
|
14
24
|
end
|
15
25
|
end
|
16
26
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
module Krikri::Util
|
2
|
+
##
|
3
|
+
# Utilities to parse string values into EDTF dates or Intervals.
|
2
4
|
module ExtendedDateParser
|
3
5
|
module_function
|
4
6
|
|
@@ -20,18 +22,21 @@ module Krikri::Util
|
|
20
22
|
# #range_match to look for range values.
|
21
23
|
#
|
22
24
|
# @return [Date, EDTF::Epoch, EDTF::Interval, nil] the date parsed or nil
|
25
|
+
# @see http://www.loc.gov/standards/datetime/
|
23
26
|
def parse(date_str, allow_interval = false)
|
24
|
-
date_str.
|
25
|
-
|
26
|
-
date
|
27
|
-
date ||=
|
28
|
-
date ||=
|
29
|
-
date ||=
|
30
|
-
date ||=
|
31
|
-
date ||=
|
32
|
-
date ||=
|
33
|
-
date ||=
|
34
|
-
|
27
|
+
str = preprocess(date_str.dup)
|
28
|
+
date = parse_interval(str) if allow_interval
|
29
|
+
date ||= parse_m_d_y(str)
|
30
|
+
date ||= Date.edtf(str.gsub('.', '-'))
|
31
|
+
date ||= partial_edtf(str)
|
32
|
+
date ||= decade_hyphen(str)
|
33
|
+
date ||= month_year(str)
|
34
|
+
date ||= decade_s(str)
|
35
|
+
date ||= hyphenated_partial_range(str)
|
36
|
+
date ||= parse_date(str)
|
37
|
+
# Only do this if certian letters are present to avoid infinite loops.
|
38
|
+
date ||= circa(str) if str.match(/[circabout]/i)
|
39
|
+
date = date.first if date.is_a? EDTF::Set
|
35
40
|
date || nil
|
36
41
|
end
|
37
42
|
|
@@ -55,6 +60,36 @@ module Krikri::Util
|
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
63
|
+
##
|
64
|
+
# Preprocess the date string to remove extra whitespace and convert ad hoc
|
65
|
+
# formatting to equivalent EDTF.
|
66
|
+
#
|
67
|
+
# @todo should '-` be intepreted as 'x' or '?'
|
68
|
+
# @see http://www.loc.gov/standards/datetime/pre-submission.html#maskedprecision
|
69
|
+
def preprocess(str)
|
70
|
+
str.gsub!(/late/i, '')
|
71
|
+
str.gsub!(/early/i, '')
|
72
|
+
str.strip!
|
73
|
+
str.gsub!(/\s+/, ' ')
|
74
|
+
str.gsub!('0s', 'x') if str.match(/^[1-9]+0s$/)
|
75
|
+
str.gsub!('-', 'x') if str.match(/^[1-9]+\-+$/)
|
76
|
+
str
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Remove 'circa' or 'about' or variations and return an uncertian ETDF
|
81
|
+
# dates.
|
82
|
+
#
|
83
|
+
# @param str [String]
|
84
|
+
# @return [Date, nil] an EDTF date, marked uncertian; or `nil`
|
85
|
+
# @see #parse
|
86
|
+
def circa(str)
|
87
|
+
run = str.gsub!(/.*c[irca\.]*/i, '')
|
88
|
+
run ||= str.gsub!(/.*about/i, '')
|
89
|
+
date = parse(str) if run
|
90
|
+
date.nil? ? nil : date.uncertain!
|
91
|
+
end
|
92
|
+
|
58
93
|
##
|
59
94
|
# Creates an EDTF::Interval from a string
|
60
95
|
#
|
data/lib/krikri/version.rb
CHANGED
data/spec/internal/Gemfile.lock
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: /home/tjohnson/src/dpla/krikri
|
3
3
|
specs:
|
4
|
-
krikri (0.3.
|
4
|
+
krikri (0.3.3)
|
5
5
|
analysand (= 4.0.0)
|
6
6
|
blacklight (~> 5.8.0)
|
7
7
|
devise (~> 3.4.1)
|
8
|
-
dpla-map (= 4.0.0.0.pre.
|
8
|
+
dpla-map (= 4.0.0.0.pre.10)
|
9
9
|
edtf
|
10
10
|
jsonpath
|
11
11
|
oai
|
@@ -61,7 +61,7 @@ GEM
|
|
61
61
|
rack
|
62
62
|
yajl-ruby
|
63
63
|
arel (5.0.1.20140414130214)
|
64
|
-
autoprefixer-rails (5.1.7)
|
64
|
+
autoprefixer-rails (5.1.7.1)
|
65
65
|
execjs
|
66
66
|
json
|
67
67
|
bcrypt (3.1.10)
|
@@ -72,7 +72,7 @@ GEM
|
|
72
72
|
nokogiri (~> 1.6)
|
73
73
|
rails (>= 3.2.6, < 5)
|
74
74
|
rsolr (~> 1.0.6)
|
75
|
-
bootstrap-sass (3.3.
|
75
|
+
bootstrap-sass (3.3.4.1)
|
76
76
|
autoprefixer-rails (>= 5.0.0.1)
|
77
77
|
sass (>= 3.2.19)
|
78
78
|
builder (3.2.2)
|
@@ -100,10 +100,10 @@ GEM
|
|
100
100
|
responders
|
101
101
|
thread_safe (~> 0.1)
|
102
102
|
warden (~> 1.2.3)
|
103
|
-
dpla-map (4.0.0.0.pre.
|
104
|
-
active-triples (~> 0.
|
105
|
-
linked_vocabs (~> 0.
|
106
|
-
ebnf (0.3.
|
103
|
+
dpla-map (4.0.0.0.pre.10)
|
104
|
+
active-triples (~> 0.6.0)
|
105
|
+
linked_vocabs (~> 0.2.0)
|
106
|
+
ebnf (0.3.7)
|
107
107
|
haml (~> 4.0)
|
108
108
|
rdf (~> 1.1)
|
109
109
|
sxp (~> 0.1, >= 0.1.3)
|
@@ -130,7 +130,7 @@ GEM
|
|
130
130
|
htmlentities (4.3.3)
|
131
131
|
http_parser.rb (0.6.0)
|
132
132
|
i18n (0.7.0)
|
133
|
-
jbuilder (2.2.
|
133
|
+
jbuilder (2.2.12)
|
134
134
|
activesupport (>= 3.0.0, < 5)
|
135
135
|
multi_json (~> 1.2)
|
136
136
|
jettywrapper (2.0.3)
|
@@ -153,8 +153,8 @@ GEM
|
|
153
153
|
activesupport (>= 3.0.0)
|
154
154
|
libv8 (3.16.14.7)
|
155
155
|
link_header (0.0.8)
|
156
|
-
linked_vocabs (0.
|
157
|
-
active-triples (>= 0.
|
156
|
+
linked_vocabs (0.2.0)
|
157
|
+
active-triples (>= 0.6.0)
|
158
158
|
rake
|
159
159
|
rdf (>= 1.1.2.1)
|
160
160
|
sparql
|
@@ -329,7 +329,7 @@ GEM
|
|
329
329
|
libv8 (~> 3.16.14.0)
|
330
330
|
ref
|
331
331
|
thor (0.19.1)
|
332
|
-
thread_safe (0.3.
|
332
|
+
thread_safe (0.3.5)
|
333
333
|
tilt (1.4.1)
|
334
334
|
timers (4.0.1)
|
335
335
|
hitimes
|