traject_profiling 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7e0af0cc816155f0d24efd1e61b3291aff736b9a
4
+ data.tar.gz: 339785fe91fe7114c85627ee48935c9e1e366904
5
+ SHA512:
6
+ metadata.gz: 8eb43cb020eadf138e87d2dbeedfff53f086507d87060dd0c31649328050979c45903163412ae3bd83a4921cdb5e0fbe6d999018feccbf6a32fa862b3fdfa1fb
7
+ data.tar.gz: f1d6e12641c74351ce0ffec992afc921b8e9120c9666f9f0c1421fa1dbdb2560d63dbf8f893994ece7f0bf957beacc53750c5e64dbca777f8ca782125f023477
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ script: rake ci
3
+ rvm:
4
+ - ruby-head
5
+ - 2.2.3
6
+ - 2.1.5 # developing here, for now
7
+ - jruby-9.0.0.0
8
+ - jruby-1.7.20
9
+ matrix:
10
+ allow_failures:
11
+ - rvm: ruby-head
12
+ fast_finish: true
13
+ env:
14
+ global:
15
+ - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
16
+ notifications:
17
+ email:
18
+ - ndushay@stanford.edu
19
+ sudo: false
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in traject_profiling.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,12 @@
1
+ Copyright 2015 The Board of Trustees of the Leland Stanford Junior University.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+ http://www.apache.org/licenses/LICENSE-2.0
7
+
8
+ Unless required by applicable law or agreed to in writing, software
9
+ distributed under the License is distributed on an "AS IS" BASIS,
10
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ See the License for the specific language governing permissions and
12
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # traject_profiling
2
+
3
+ Traject macros to provide profiling information on MARC bibliographic records.
4
+
5
+ This code is meant to be used with [traject](http://github.com/traject/traject) to index MARC records into [Solr](http://lucene.apache.org/solr).
6
+
7
+ ## Usage
8
+
9
+ ### A sample traject configuration file using macros from traect_profiling
10
+
11
+ ```ruby
12
+ require 'traject'
13
+ require 'traject/profiling'
14
+ extend Traject::Profiling::Macros
15
+
16
+ to_field 'id', extract_marc('001', :first=>true)
17
+ to_field 'f700count', field_count('700')
18
+ to_field 'f700ind1', field_ind_vals('700', '1')
19
+ to_field 'f700ind2', field_ind_vals('700', '2')
20
+ to_field 'f700subflds', field_subfield_codes('700')
21
+ to_field 'f880_for', f880_tags
22
+ to_field 'f880_for', f880_tags_and_subfields
23
+
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ ```ruby
31
+ gem 'traject_profiling'
32
+ ```
33
+
34
+ And then execute:
35
+
36
+ $ bundle
37
+
38
+ Or install it yourself as:
39
+
40
+ $ gem install traject_profiling
41
+
42
+ ## Contributing
43
+
44
+ 1. Fork it ( https://github.com/[my-github-username]/traject_profiling/fork )
45
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
46
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
47
+ 4. Push to the branch (`git push origin my-new-feature`)
48
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ begin
2
+ require 'bundler'
3
+ require "bundler/gem_tasks"
4
+ rescue LoadError => e
5
+ warn e.message
6
+ warn "Run `gem install bundler` to install Bundler."
7
+ exit -1
8
+ end
9
+
10
+ begin
11
+ Bundler.setup(:development)
12
+ rescue Bundler::BundlerError => e
13
+ warn e.message
14
+ warn "Run `bundle install` to install missing gems."
15
+ exit e.status_code
16
+ end
17
+
18
+ require 'rake'
19
+
20
+ begin
21
+ require 'yard'
22
+ YARD::Rake::YardocTask.new
23
+ task :doc => :yard
24
+ rescue LoadError
25
+ # yard not available - we're probably on a prod environment or need to run bundle install
26
+ end
27
+
28
+ begin
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec)
31
+ task :default => :spec
32
+ rescue LoadError
33
+ # rspec not available - we're probably on a prod environment or need to run bundle install
34
+ end
35
+
@@ -0,0 +1,25 @@
1
+ module Traject
2
+ class Profiling
3
+
4
+ module Macros
5
+
6
+ # to_field 'f100ind1', field_ind('100', '1')
7
+ # to_field 'f100ind2', field_ind('100', '2')
8
+ # to_field 'f100subflds', profile_subfields('100')
9
+
10
+ # counts the number of occurrences of a field in a marc record.
11
+ # If no occurrences, accumulator is not altered (field should be missing in output_hash)
12
+ # @param [String] tag - marc field tag; three chars (usually but not neccesarily numeric)
13
+ # @return [lambda] lambda expression appropriate for "to_field", with the number of marc fields
14
+ # matching the tag param added to in the lambda's accumulator param
15
+ def field_count(tag)
16
+ return lambda do |record, accumulator, context|
17
+ num_fields = record.fields(tag).size
18
+ accumulator << num_fields.to_s if num_fields > 0
19
+ end
20
+ end
21
+
22
+ end # module Macros
23
+
24
+ end
25
+ end
@@ -0,0 +1,5 @@
1
+ module Traject
2
+ class Profiling
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ require "traject/profiling/version"
2
+ require "traject/profiling/field_macros"
3
+
4
+ module Traject
5
+ class Profiling
6
+
7
+ end
8
+ end
@@ -0,0 +1,64 @@
1
+
2
+ RSpec.describe 'field_macros' do
3
+
4
+ let!(:indexer) {
5
+ i = Traject::Indexer.new
6
+ i.instance_eval do
7
+ extend Traject::Profiling::Macros
8
+ end
9
+ i
10
+ }
11
+
12
+ context "field_count" do
13
+ let!(:record) {
14
+ marcxml_str =
15
+ '<record xmlns="http://www.loc.gov/MARC21/slim">
16
+ <leader>01052cam a2200313 i 4500</leader>
17
+ <controlfield tag="001">245a</controlfield>
18
+ <controlfield tag="008">140604t20152015enk b 001 0 eng d</controlfield>
19
+ <datafield ind1="1" ind2="0" tag="245">
20
+ <subfield code="a">Slippery noodles</subfield>
21
+ </datafield>
22
+ <datafield tag="700" ind1="1" ind2=" ">
23
+ <subfield code="a">Potter, Harry.</subfield>
24
+ </datafield>
25
+ <datafield tag="700" ind1="1" ind2=" ">
26
+ <subfield code="a">Snape, Severus.</subfield>
27
+ </datafield>
28
+ </record>'
29
+ parse_marc(marcxml_str)
30
+ }
31
+
32
+ it 'single occurrence of tag' do
33
+ indexer.instance_eval do
34
+ to_field '245count', field_count('245')
35
+ end
36
+ output = indexer.map_record(record)
37
+ expect(output['245count']).to eq ['1']
38
+ end
39
+ it 'mult occurrences of tag' do
40
+ indexer.instance_eval do
41
+ to_field '700count', field_count('700')
42
+ end
43
+ output = indexer.map_record(record)
44
+ expect(output['700count']).to eq ['2']
45
+ end
46
+ it 'no occurrences of tag: field not in output_hash' do
47
+ indexer.instance_eval do
48
+ to_field '100count', field_count('100')
49
+ end
50
+ output = indexer.map_record(record)
51
+ expect(output['100count']).to eq nil
52
+ end
53
+ end # field_count
54
+
55
+ end
56
+
57
+ # @param [String] marcxml_str an xml representation of a MARC record
58
+ # @raise [Marc::Exception] if nil returned from MARC::XMLReader
59
+ # @return [MARC::Record] parsed marc_record
60
+ def parse_marc(marcxml_str)
61
+ marc_record = MARC::XMLReader.new(StringIO.new(marcxml_str)).to_a.first
62
+ fail(MARC::Exception, "unable to parse marc record: " + marcxml_str, caller) if marc_record.nil?
63
+ marc_record
64
+ end
@@ -0,0 +1,56 @@
1
+ require 'traject'
2
+ require 'traject/profiling'
3
+
4
+ RSpec.configure do |config|
5
+
6
+ config.expect_with :rspec do |expectations|
7
+ # This option will default to `true` in RSpec 4. It makes the `description`
8
+ # and `failure_message` of custom matchers include text for helper methods
9
+ # defined using `chain`, e.g.:
10
+ # be_bigger_than(2).and_smaller_than(4).description
11
+ # # => "be bigger than 2 and smaller than 4"
12
+ # ...rather than:
13
+ # # => "be bigger than 2"
14
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
15
+ end
16
+
17
+ config.mock_with :rspec do |mocks|
18
+ # Prevents you from mocking or stubbing a method that does not exist on
19
+ # a real object. This is generally recommended, and will default to
20
+ # `true` in RSpec 4.
21
+ mocks.verify_partial_doubles = true
22
+ end
23
+
24
+ # Limits the available syntax to the non-monkey patched syntax that is
25
+ # recommended. For more details, see:
26
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
27
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
28
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
29
+ config.disable_monkey_patching!
30
+
31
+ # This setting enables warnings. It's recommended, but in some cases may
32
+ # be too noisy due to issues in dependencies.
33
+ # config.warnings = true
34
+
35
+ # Many RSpec users commonly either run the entire suite or an individual
36
+ # file, and it's useful to allow more verbose output when running an
37
+ # individual spec file.
38
+ if config.files_to_run.one?
39
+ # Use the documentation formatter for detailed output,
40
+ # unless a formatter has already been configured
41
+ # (e.g. via a command-line flag).
42
+ config.default_formatter = 'doc'
43
+ end
44
+
45
+ # Print the 10 slowest examples and example groups at the
46
+ # end of the spec run, to help surface which specs are running
47
+ # particularly slow.
48
+ #config.profile_examples = 10
49
+
50
+ # Run specs in random order to surface order dependencies. If you find an
51
+ # order dependency and want to debug it, you can fix the order by providing
52
+ # the seed, which is printed after each run.
53
+ # --seed 1234
54
+ config.order = :random
55
+
56
+ end
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib/", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ require 'traject/profiling/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "traject_profiling"
9
+ spec.version = Traject::Profiling::VERSION
10
+ spec.authors = ["Naomi Dushay"]
11
+ spec.email = ["ndushay@stanford.edu"]
12
+ spec.summary = %q{Traject macros to provide profiling information on MARC bibliographic records.}
13
+ spec.description = %q{Profiling macros for MARC bib records; meant to be used with traject to index MARC records into Solr.}
14
+ spec.homepage = "https://github.com/sul-dlss/traject_profiling.git"
15
+ spec.license = "Apache 2.0"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib", "lib/traject", "lib/traject/profiling"]
21
+
22
+ spec.add_runtime_dependency 'traject'
23
+
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "yard"
27
+ spec.add_development_dependency "rspec"
28
+ spec.add_development_dependency "pry"
29
+ spec.add_development_dependency "pry-byebug"
30
+ end
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: traject_profiling
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Naomi Dushay
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: traject
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry-byebug
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Profiling macros for MARC bib records; meant to be used with traject
112
+ to index MARC records into Solr.
113
+ email:
114
+ - ndushay@stanford.edu
115
+ executables: []
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - lib/traject/profiling.rb
127
+ - lib/traject/profiling/field_macros.rb
128
+ - lib/traject/profiling/version.rb
129
+ - spec/field_macros_spec.rb
130
+ - spec/spec_helper.rb
131
+ - traject_profiling.gemspec
132
+ homepage: https://github.com/sul-dlss/traject_profiling.git
133
+ licenses:
134
+ - Apache 2.0
135
+ metadata: {}
136
+ post_install_message:
137
+ rdoc_options: []
138
+ require_paths:
139
+ - lib
140
+ - lib/traject
141
+ - lib/traject/profiling
142
+ required_ruby_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ required_rubygems_version: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ requirements: []
153
+ rubyforge_project:
154
+ rubygems_version: 2.4.3
155
+ signing_key:
156
+ specification_version: 4
157
+ summary: Traject macros to provide profiling information on MARC bibliographic records.
158
+ test_files:
159
+ - spec/field_macros_spec.rb
160
+ - spec/spec_helper.rb
161
+ has_rdoc: