traject_profiling 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7e0af0cc816155f0d24efd1e61b3291aff736b9a
4
+ data.tar.gz: 339785fe91fe7114c85627ee48935c9e1e366904
5
+ SHA512:
6
+ metadata.gz: 8eb43cb020eadf138e87d2dbeedfff53f086507d87060dd0c31649328050979c45903163412ae3bd83a4921cdb5e0fbe6d999018feccbf6a32fa862b3fdfa1fb
7
+ data.tar.gz: f1d6e12641c74351ce0ffec992afc921b8e9120c9666f9f0c1421fa1dbdb2560d63dbf8f893994ece7f0bf957beacc53750c5e64dbca777f8ca782125f023477
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ script: rake ci
3
+ rvm:
4
+ - ruby-head
5
+ - 2.2.3
6
+ - 2.1.5 # developing here, for now
7
+ - jruby-9.0.0.0
8
+ - jruby-1.7.20
9
+ matrix:
10
+ allow_failures:
11
+ - rvm: ruby-head
12
+ fast_finish: true
13
+ env:
14
+ global:
15
+ - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
16
+ notifications:
17
+ email:
18
+ - ndushay@stanford.edu
19
+ sudo: false
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in traject_profiling.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,12 @@
1
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+ http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ Unless required by applicable law or agreed to in writing, software
9
+ distributed under the License is distributed on an "AS IS" BASIS,
10
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ See the License for the specific language governing permissions and
12
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # traject_profiling
2
+
3
+ Traject macros to provide profiling information on MARC bibliographic records.
4
+
5
+ This code is meant to be used with [traject](http://github.com/traject/traject) to index MARC records into [Solr](http://lucene.apache.org/solr).
6
+
7
+ ## Usage
8
+
9
+ ### A sample traject configuration file using macros from traect_profiling
10
+
11
+ ```ruby
12
+ require 'traject'
13
+ require 'traject/profiling'
14
+ extend Traject::Profiling::Macros
15
+
16
+ to_field 'id', extract_marc('001', :first=>true)
17
+ to_field 'f700count', field_count('700')
18
+ to_field 'f700ind1', field_ind_vals('700', '1')
19
+ to_field 'f700ind2', field_ind_vals('700', '2')
20
+ to_field 'f700subflds', field_subfield_codes('700')
21
+ to_field 'f880_for', f880_tags
22
+ to_field 'f880_for', f880_tags_and_subfields
23
+
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ ```ruby
31
+ gem 'traject_profiling'
32
+ ```
33
+
34
+ And then execute:
35
+
36
+ $ bundle
37
+
38
+ Or install it yourself as:
39
+
40
+ $ gem install traject_profiling
41
+
42
+ ## Contributing
43
+
44
+ 1. Fork it ( https://github.com/[my-github-username]/traject_profiling/fork )
45
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
46
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
47
+ 4. Push to the branch (`git push origin my-new-feature`)
48
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ begin
2
+ require 'bundler'
3
+ require "bundler/gem_tasks"
4
+ rescue LoadError => e
5
+ warn e.message
6
+ warn "Run `gem install bundler` to install Bundler."
7
+ exit -1
8
+ end
9
+
10
+ begin
11
+ Bundler.setup(:development)
12
+ rescue Bundler::BundlerError => e
13
+ warn e.message
14
+ warn "Run `bundle install` to install missing gems."
15
+ exit e.status_code
16
+ end
17
+
18
+ require 'rake'
19
+
20
+ begin
21
+ require 'yard'
22
+ YARD::Rake::YardocTask.new
23
+ task :doc => :yard
24
+ rescue LoadError
25
+ # yard not available - we're probably on a prod environment or need to run bundle install
26
+ end
27
+
28
+ begin
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec)
31
+ task :default => :spec
32
+ rescue LoadError
33
+ # rspec not available - we're probably on a prod environment or need to run bundle install
34
+ end
35
+
@@ -0,0 +1,25 @@
1
+ module Traject
2
+ class Profiling
3
+
4
+ module Macros
5
+
6
+ # to_field 'f100ind1', field_ind('100', '1')
7
+ # to_field 'f100ind2', field_ind('100', '2')
8
+ # to_field 'f100subflds', profile_subfields('100')
9
+
10
+ # counts the number of occurrences of a field in a marc record.
11
+ # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
+ # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
13
+ # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
+ # matching the tag param added to in the lambda's accumulator param
15
+ def field_count(tag)
16
+ return lambda do |record, accumulator, context|
17
+ num_fields = record.fields(tag).size
18
+ accumulator << num_fields.to_s if num_fields > 0
19
+ end
20
+ end
21
+
22
+ end # module Macros
23
+
24
+ end
25
+ end
@@ -0,0 +1,5 @@
1
+ module Traject
2
+ class Profiling
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ require "traject/profiling/version"
2
+ require "traject/profiling/field_macros"
3
+
4
+ module Traject
5
+ class Profiling
6
+
7
+ end
8
+ end
@@ -0,0 +1,64 @@
1
+
2
+ RSpec.describe 'field_macros' do
3
+
4
+ let!(:indexer) {
5
+ i = Traject::Indexer.new
6
+ i.instance_eval do
7
+ extend Traject::Profiling::Macros
8
+ end
9
+ i
10
+ }
11
+
12
+ context "field_count" do
13
+ let!(:record) {
14
+ marcxml_str =
15
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
16
+ <leader>01052cam a2200313 i 4500</leader>
17
+ <controlfield tag="001">245a</controlfield>
18
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
19
+ <datafield ind1="1" ind2="0" tag="245">
20
+ <subfield code="a">Slippery noodles</subfield>
21
+ </datafield>
22
+ <datafield tag="700" ind1="1" ind2=" ">
23
+ <subfield code="a">Potter, Harry.</subfield>
24
+ </datafield>
25
+ <datafield tag="700" ind1="1" ind2=" ">
26
+ <subfield code="a">Snape, Severus.</subfield>
27
+ </datafield>
28
+ </record>'
29
+ parse_marc(marcxml_str)
30
+ }
31
+
32
+ it 'single occurrence of tag' do
33
+ indexer.instance_eval do
34
+ to_field '245count', field_count('245')
35
+ end
36
+ output = indexer.map_record(record)
37
+ expect(output['245count']).to eq ['1']
38
+ end
39
+ it 'mult occurrences of tag' do
40
+ indexer.instance_eval do
41
+ to_field '700count', field_count('700')
42
+ end
43
+ output = indexer.map_record(record)
44
+ expect(output['700count']).to eq ['2']
45
+ end
46
+ it 'no occurrences of tag: field not in output_hash' do
47
+ indexer.instance_eval do
48
+ to_field '100count', field_count('100')
49
+ end
50
+ output = indexer.map_record(record)
51
+ expect(output['100count']).to eq nil
52
+ end
53
+ end # field_count
54
+
55
+ end
56
+
57
+ # @param [String] marcxml_str an xml representation of a MARC record
58
+ # @raise [Marc::Exception] if nil returned from MARC::XMLReader
59
+ # @return [MARC::Record] parsed marc_record
60
+ def parse_marc(marcxml_str)
61
+ marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
62
+ fail(MARC::Exception, "unable to parse marc record: " + marcxml_str, caller) if marc_record.nil?
63
+ marc_record
64
+ end
@@ -0,0 +1,56 @@
1
+ require 'traject'
2
+ require 'traject/profiling'
3
+
4
+ RSpec.configure do |config|
5
+
6
+ config.expect_with :rspec do |expectations|
7
+ # This option will default to `true` in RSpec 4. It makes the `description`
8
+ # and `failure_message` of custom matchers include text for helper methods
9
+ # defined using `chain`, e.g.:
10
+ # be_bigger_than(2).and_smaller_than(4).description
11
+ # # => "be bigger than 2 and smaller than 4"
12
+ # ...rather than:
13
+ # # => "be bigger than 2"
14
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
15
+ end
16
+
17
+ config.mock_with :rspec do |mocks|
18
+ # Prevents you from mocking or stubbing a method that does not exist on
19
+ # a real object. This is generally recommended, and will default to
20
+ # `true` in RSpec 4.
21
+ mocks.verify_partial_doubles = true
22
+ end
23
+
24
+ # Limits the available syntax to the non-monkey patched syntax that is
25
+ # recommended. For more details, see:
26
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
27
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
28
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
29
+ config.disable_monkey_patching!
30
+
31
+ # This setting enables warnings. It's recommended, but in some cases may
32
+ # be too noisy due to issues in dependencies.
33
+ # config.warnings = true
34
+
35
+ # Many RSpec users commonly either run the entire suite or an individual
36
+ # file, and it's useful to allow more verbose output when running an
37
+ # individual spec file.
38
+ if config.files_to_run.one?
39
+ # Use the documentation formatter for detailed output,
40
+ # unless a formatter has already been configured
41
+ # (e.g. via a command-line flag).
42
+ config.default_formatter = 'doc'
43
+ end
44
+
45
+ # Print the 10 slowest examples and example groups at the
46
+ # end of the spec run, to help surface which specs are running
47
+ # particularly slow.
48
+ #config.profile_examples = 10
49
+
50
+ # Run specs in random order to surface order dependencies. If you find an
51
+ # order dependency and want to debug it, you can fix the order by providing
52
+ # the seed, which is printed after each run.
53
+ # --seed 1234
54
+ config.order = :random
55
+
56
+ end
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib/", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ require 'traject/profiling/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "traject_profiling"
9
+ spec.version = Traject::Profiling::VERSION
10
+ spec.authors = ["Naomi Dushay"]
11
+ spec.email = ["ndushay@stanford.edu"]
12
+ spec.summary = %q{Traject macros to provide profiling information on MARC bibliographic records.}
13
+ spec.description = %q{Profiling macros for MARC bib records; meant to be used with traject to index MARC records into Solr.}
14
+ spec.homepage = "https://github.com/sul-dlss/traject_profiling.git"
15
+ spec.license = "Apache 2.0"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib", "lib/traject", "lib/traject/profiling"]
21
+
22
+ spec.add_runtime_dependency 'traject'
23
+
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "yard"
27
+ spec.add_development_dependency "rspec"
28
+ spec.add_development_dependency "pry"
29
+ spec.add_development_dependency "pry-byebug"
30
+ end
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: traject_profiling
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Naomi Dushay
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: traject
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry-byebug
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Profiling macros for MARC bib records; meant to be used with traject
112
+ to index MARC records into Solr.
113
+ email:
114
+ - ndushay@stanford.edu
115
+ executables: []
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - lib/traject/profiling.rb
127
+ - lib/traject/profiling/field_macros.rb
128
+ - lib/traject/profiling/version.rb
129
+ - spec/field_macros_spec.rb
130
+ - spec/spec_helper.rb
131
+ - traject_profiling.gemspec
132
+ homepage: https://github.com/sul-dlss/traject_profiling.git
133
+ licenses:
134
+ - Apache 2.0
135
+ metadata: {}
136
+ post_install_message:
137
+ rdoc_options: []
138
+ require_paths:
139
+ - lib
140
+ - lib/traject
141
+ - lib/traject/profiling
142
+ required_ruby_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ required_rubygems_version: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ requirements: []
153
+ rubyforge_project:
154
+ rubygems_version: 2.4.3
155
+ signing_key:
156
+ specification_version: 4
157
+ summary: Traject macros to provide profiling information on MARC bibliographic records.
158
+ test_files:
159
+ - spec/field_macros_spec.rb
160
+ - spec/spec_helper.rb
161
+ has_rdoc: