proiel 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +19 -0
- data/README.md +99 -0
- data/bin/console +6 -0
- data/bin/setup +5 -0
- data/lib/proiel/annotation_schema.rb +127 -0
- data/lib/proiel/citations.rb +84 -0
- data/lib/proiel/div.rb +133 -0
- data/lib/proiel/positional_tag.rb +127 -0
- data/lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.xsd +172 -0
- data/lib/proiel/proiel_xml/proiel-1.0/teilite.xsd +7387 -0
- data/lib/proiel/proiel_xml/proiel-1.0/xml.xsd +287 -0
- data/lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.xsd +185 -0
- data/lib/proiel/proiel_xml/reader.rb +237 -0
- data/lib/proiel/proiel_xml/schema.rb +81 -0
- data/lib/proiel/proiel_xml/validator.rb +177 -0
- data/lib/proiel/sentence.rb +191 -0
- data/lib/proiel/source.rb +114 -0
- data/lib/proiel/statistics.rb +41 -0
- data/lib/proiel/token.rb +407 -0
- data/lib/proiel/tokenization.rb +90 -0
- data/lib/proiel/treebank.rb +214 -0
- data/lib/proiel/treebank_object.rb +21 -0
- data/lib/proiel/version.rb +9 -0
- data/lib/proiel.rb +28 -0
- metadata +210 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6271d2f5d29934447660cf7a2ba1f416a1171b50
|
4
|
+
data.tar.gz: 467914536f5f6794fa84729227b4f0ddff62c2dd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ec4b448baff57c7faf9b31861667e18cc3f9297dc946c31d6e9e3b491050b39af8e0894eea5ca82c2051fb45d2d90c16b5046568ce780a79f7027beaff323e3
|
7
|
+
data.tar.gz: 1a8c3cf8c2c29b11904bcc67e4e4a0aa557c38bf8c0dbf66deaef36a404cb7deceae2fbefd7fc21d82c208408cf515e291ee2dc960bef56820108cfe390d7847
|
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2015 Marius L. Jøhndal
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# PROIEL treebank utility library
|
2
|
+
|
3
|
+
This is a utility library for reading and manipulating treebanks that use the
|
4
|
+
PROIEL annotation scheme and the PROIEL XML-based interchange format.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
To install this library you need Ruby 2.1 or newer.
|
9
|
+
|
10
|
+
```shell
|
11
|
+
gem install proiel
|
12
|
+
```
|
13
|
+
|
14
|
+
## Getting started
|
15
|
+
|
16
|
+
The recommended way to use this library in your application is with `bundler`.
|
17
|
+
Create a `Gemfile` with the following content:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
source 'https://rubygems.org'
|
21
|
+
gem 'proiel', '~> 1.0'
|
22
|
+
```
|
23
|
+
|
24
|
+
and then execute
|
25
|
+
|
26
|
+
```shell
|
27
|
+
bundle
|
28
|
+
```
|
29
|
+
|
30
|
+
To download a sample treebank, initialize a new git repository and add the
|
31
|
+
[PROIEL treebank](http://proiel.github.io) as a submodule:
|
32
|
+
|
33
|
+
```shell
|
34
|
+
git init
|
35
|
+
mkdir vendor
|
36
|
+
git submodule add --depth 1 https://github.com/proiel/proiel-treebank.git vendor/proiel-treebank
|
37
|
+
```
|
38
|
+
|
39
|
+
Here is a skeleton programme to get you started. Save this as `myproject.rb`:
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
#!/usr/bin/env ruby
|
43
|
+
require 'proiel'
|
44
|
+
|
45
|
+
tb = PROIEL::Treebank.new
|
46
|
+
Dir[File.join('vendor', 'proiel-treebank', '*.xml')].each do |filename|
|
47
|
+
puts "Reading #{filename}..."
|
48
|
+
tb.load_from_xml(filename)
|
49
|
+
end
|
50
|
+
|
51
|
+
tb.sources.each do |source|
|
52
|
+
source.divs.each do |div|
|
53
|
+
div.sentences.each do |sentence|
|
54
|
+
sentence.tokens.each do |token|
|
55
|
+
# Do something
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
You can now run this as:
|
63
|
+
|
64
|
+
```shell
|
65
|
+
bundle exec ruby myproject.rb
|
66
|
+
```
|
67
|
+
|
68
|
+
See the [wiki](https://github.com/proiel/proiel/wiki) for more information.
|
69
|
+
|
70
|
+
## Versioning
|
71
|
+
|
72
|
+
`proiel` aims to adhere to [Semantic Versioning 2.0.0](http://semver.org/spec/v2.0.0.html). This means that a patch version or minor version should not break backward compatibility of a public API, and that breaking changes should only be introduced with new major versions. When specifying a dependency on this gem it is best practice to use a pessimistic version constraint with two digits of precision:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
spec.add_dependency 'proiel', '~> 1.0'
|
76
|
+
```
|
77
|
+
|
78
|
+
## Development
|
79
|
+
|
80
|
+
Check out the git repository from GitHub and run `bin/setup` to install
|
81
|
+
all development dependencies. Then run `rake` to run the tests.
|
82
|
+
|
83
|
+
You can also run `bin/console` for an interactive prompt to experiment with.
|
84
|
+
|
85
|
+
To install a development version of this gem, run `bundle exec rake install`.
|
86
|
+
|
87
|
+
To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the gem to [rubygems.org](https://rubygems.org).
|
88
|
+
|
89
|
+
## Documentation
|
90
|
+
|
91
|
+
Documentation can be generated using YARD:
|
92
|
+
|
93
|
+
```sh
|
94
|
+
yard
|
95
|
+
```
|
96
|
+
|
97
|
+
## Contributing
|
98
|
+
|
99
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/proiel/proiel.
|
data/bin/console
ADDED
data/bin/setup
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# A representation of the annotation schema found in the header of a PROIEL
|
8
|
+
# XML file. This should not be confused with the PROIEL XML schema, which is
|
9
|
+
# used for validating the XML in a PROIEL XML file.
|
10
|
+
class AnnotationSchema
|
11
|
+
# @return [Hash<String,PartOfSpeechTagDefinition>] definition of part of speech tags
|
12
|
+
attr_reader :part_of_speech_tags
|
13
|
+
|
14
|
+
# @return [Hash<String,RelationTagDefinition>] definition of relation tags
|
15
|
+
attr_reader :relation_tags
|
16
|
+
|
17
|
+
# @return [Hash<Symbol,Hash<String,MorphologyFieldTagDefinition>>] definition of morphology tags
|
18
|
+
attr_reader :morphology_tags
|
19
|
+
|
20
|
+
# @return [Hash<String,InformationStatusTagDefinition>] definition of information status tags
|
21
|
+
attr_reader :information_status_tags
|
22
|
+
|
23
|
+
# Creates a new annotation schema object.
|
24
|
+
def initialize(xml_object)
|
25
|
+
@part_of_speech_tags = make_part_of_speech_tags(xml_object).freeze
|
26
|
+
@relation_tags = make_relation_tags(xml_object).freeze
|
27
|
+
@morphology_tags = make_morphology_tags(xml_object).freeze
|
28
|
+
@information_status_tags = make_information_status_tags(xml_object).freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Hash<String,RelationTagDefinition>] definition of primary relation tags
|
32
|
+
def primary_relations
|
33
|
+
@relation_tags.select { |_, features| features.primary }
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Hash<String,RelationTagDefinition>] definition of secondary relation tags
|
37
|
+
def secondary_relations
|
38
|
+
@relation_tags.select { |_, features| features.secondary }
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tests for equality of two annotation schema objects.
|
42
|
+
#
|
43
|
+
# @return [true,false]
|
44
|
+
#
|
45
|
+
def ==(o)
|
46
|
+
@part_of_speech_tags.sort_by(&:first) == o.part_of_speech_tags.sort_by(&:first) and
|
47
|
+
@relation_tags.sort_by(&:first) == o.relation_tags.sort_by(&:first)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def make_tag_hash(element)
|
53
|
+
element.values.map { |e| [e.tag, yield(e)] }.compact.to_h
|
54
|
+
end
|
55
|
+
|
56
|
+
def make_relation_tags(xml_object)
|
57
|
+
make_tag_hash(xml_object.relations) do |e|
|
58
|
+
RelationTagDefinition.new(e.summary, e.primary == 'true', e.secondary == 'true')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def make_part_of_speech_tags(xml_object)
|
63
|
+
make_tag_hash(xml_object.parts_of_speech) do |e|
|
64
|
+
PartOfSpeechTagDefinition.new(e.summary)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def make_morphology_tags(xml_object)
|
69
|
+
xml_object.morphology.fields.map do |f|
|
70
|
+
v =
|
71
|
+
make_tag_hash(f) do |e|
|
72
|
+
MorphologyFieldTagDefinition.new(e.summary)
|
73
|
+
end
|
74
|
+
[f.tag, v]
|
75
|
+
end.to_h
|
76
|
+
end
|
77
|
+
|
78
|
+
def make_information_status_tags(xml_object)
|
79
|
+
make_tag_hash(xml_object.information_statuses) do |e|
|
80
|
+
InformationStatusTagDefinition.new(e.summary)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# A tag definitions.
|
86
|
+
#
|
87
|
+
# @abstract
|
88
|
+
class GenericTagDefinition
|
89
|
+
attr_reader :summary
|
90
|
+
|
91
|
+
def initialize(summary)
|
92
|
+
@summary = summary
|
93
|
+
end
|
94
|
+
|
95
|
+
# Tests equality of two tag definitions.
|
96
|
+
def ==(o)
|
97
|
+
@summary == o.summary
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Definition of an information status tag.
|
102
|
+
class InformationStatusTagDefinition < GenericTagDefinition; end
|
103
|
+
|
104
|
+
# Definition of a relation tag.
|
105
|
+
class RelationTagDefinition < GenericTagDefinition
|
106
|
+
attr_reader :primary
|
107
|
+
attr_reader :secondary
|
108
|
+
|
109
|
+
def initialize(summary, primary, secondary)
|
110
|
+
super(summary)
|
111
|
+
|
112
|
+
@primary = primary
|
113
|
+
@secondary = secondary
|
114
|
+
end
|
115
|
+
|
116
|
+
# Tests equality of two tag definitions.
|
117
|
+
def ==(o)
|
118
|
+
@summary == o.summary and @primary == o.primary and @secondary == o.secondary
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Definition of a morphology field tag.
|
123
|
+
class MorphologyFieldTagDefinition < GenericTagDefinition; end
|
124
|
+
|
125
|
+
# Definition of a part of speech tag.
|
126
|
+
class PartOfSpeechTagDefinition < GenericTagDefinition; end
|
127
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
module Citations
|
8
|
+
# Returns a citation range that spans `cit1` to `cit2`.
|
9
|
+
#
|
10
|
+
# The regular expression `dividers` is used to chunk the strings, and then
|
11
|
+
# the longest common prefix of chunks is removed from `cit2`. `dividers`
|
12
|
+
# should chosen so that the chunks match logical components of a citation,
|
13
|
+
# e.g. book titles, chapter numbers and section identifiers.
|
14
|
+
#
|
15
|
+
# @param cit1 [String] first citation in range
|
16
|
+
# @param cit2 [String] second citation in range
|
17
|
+
# @param dividers [Regexp] dividing elements between components of citation
|
18
|
+
#
|
19
|
+
# @return [String]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# citation_make_range('Matt 5.16', 'Matt 5.27') # => "Matt 5.16–27"
|
23
|
+
# citation_make_range('Matt 4.13', 'Matt 5.27') # => "Matt 4.13–5.27"
|
24
|
+
#
|
25
|
+
def self.citation_make_range(cit1, cit2, dividers: /([\s\.]+)/)
|
26
|
+
raise ArgumentError unless cit1.is_a?(String) or cit1.nil?
|
27
|
+
raise ArgumentError unless cit2.is_a?(String) or cit1.nil?
|
28
|
+
|
29
|
+
# Remove any nil and empty-string citation, and reduce a range that starts
|
30
|
+
# and ends with the same citation to a single citation.
|
31
|
+
c = [cit1, cit2].reject { |c| c.nil? || c.empty? }.uniq
|
32
|
+
|
33
|
+
case c.length
|
34
|
+
when 0
|
35
|
+
nil
|
36
|
+
when 1
|
37
|
+
c.first
|
38
|
+
else
|
39
|
+
s = citation_strip_prefix(cit1, cit2, dividers: dividers)
|
40
|
+
[cit1, s].reject(&:empty?).join("\u{2013}")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns `cit2` without the longest prefix that `cit1` and `cit2` have in
|
45
|
+
# common.
|
46
|
+
#
|
47
|
+
# The longest common prefix is not computed from the raw strings `cit1` and
|
48
|
+
# `cit2` but from string chunks. The regular expression `dividers` is used
|
49
|
+
# to chunk the strings, and then the longest prefix of chunks is removed.
|
50
|
+
#
|
51
|
+
# `dividers` should chosen so that the chunks match logical componets of a
|
52
|
+
# citation, e.g. book titles, chapter numbers and section identifiers.
|
53
|
+
#
|
54
|
+
# @param cit1 [String] first citation in range
|
55
|
+
# @param cit2 [String] second citation in range
|
56
|
+
# @param dividers [Regexp] dividing elements between components of citation
|
57
|
+
#
|
58
|
+
# @return [String]
|
59
|
+
#
|
60
|
+
# @example
|
61
|
+
# citation_strip_prefix('Matt 5.16', 'Matt 5.27') # => "27"
|
62
|
+
# citation_strip_prefix('Matt 5.26', 'Matt 5.27') # => "27"
|
63
|
+
# citation_strip_prefix('Matt 4.13', 'Matt 5.27') # => "5.27"
|
64
|
+
#
|
65
|
+
def self.citation_strip_prefix(cit1, cit2, dividers: /([\s\.]+)/u)
|
66
|
+
raise ArgumentError unless cit1.is_a?(String)
|
67
|
+
raise ArgumentError unless cit2.is_a?(String)
|
68
|
+
|
69
|
+
x, y = cit1.split(dividers), cit2.split(dividers)
|
70
|
+
|
71
|
+
# Interleave x and y but compensate for zip's behaviour when
|
72
|
+
# y.length < x.length
|
73
|
+
zipped = x.length >= y.length ? x.zip(y) : y.zip(x).map(&:reverse)
|
74
|
+
|
75
|
+
zipped.inject('') do |d, (a, b)|
|
76
|
+
if not d.empty? or a != b
|
77
|
+
d + (b || '')
|
78
|
+
else
|
79
|
+
''
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/lib/proiel/div.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# A div object in a treebank.
|
8
|
+
class Div < TreebankObject
|
9
|
+
extend Memoist
|
10
|
+
|
11
|
+
# Returns the ID of the div.
|
12
|
+
#
|
13
|
+
# PROIEL XML 2.0 lacks IDs for divs while later versions require them. For
|
14
|
+
# PROIEL XML 2.0 unique IDs are generated by PROIEL::Treebank.
|
15
|
+
#
|
16
|
+
# @return [Fixnum] ID of the div
|
17
|
+
attr_reader :id
|
18
|
+
|
19
|
+
# @return [Source] source that the div belongs to
|
20
|
+
attr_reader :source
|
21
|
+
|
22
|
+
# @return [nil, String] title of the div
|
23
|
+
attr_reader :title
|
24
|
+
|
25
|
+
# @return [nil, String] presentation material before form
|
26
|
+
attr_reader :presentation_before
|
27
|
+
|
28
|
+
# @return [nil, String] presentation material after form
|
29
|
+
attr_reader :presentation_after
|
30
|
+
|
31
|
+
# Creates a new div object.
|
32
|
+
def initialize(parent, id, title, presentation_before, presentation_after, &block)
|
33
|
+
@source = parent
|
34
|
+
|
35
|
+
raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
|
36
|
+
@id = id
|
37
|
+
|
38
|
+
raise ArgumentError, 'string or nil expected' unless title.nil? or title.is_a?(String)
|
39
|
+
@title = title.freeze
|
40
|
+
|
41
|
+
raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
|
42
|
+
@presentation_before = presentation_before.freeze
|
43
|
+
|
44
|
+
raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
|
45
|
+
@presentation_after = presentation_after.freeze
|
46
|
+
|
47
|
+
@children = block.call(self) if block_given?
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Treebank] parent treebank object
|
51
|
+
def treebank
|
52
|
+
@source.treebank
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [String] language of the div as an ISO 639-3 language tag
|
56
|
+
def language
|
57
|
+
source.language
|
58
|
+
end
|
59
|
+
|
60
|
+
memoize :language
|
61
|
+
|
62
|
+
# @return [String] a complete citation for the div
|
63
|
+
def citation
|
64
|
+
[source.citation_part, citation_part].join(' ')
|
65
|
+
end
|
66
|
+
|
67
|
+
# Computes an appropriate citation component for the div.
|
68
|
+
#
|
69
|
+
# The computed citation component must be concatenated with the citation
|
70
|
+
# component provided by the source to produce a complete citation.
|
71
|
+
#
|
72
|
+
# @see citation
|
73
|
+
#
|
74
|
+
# @return [String] the citation component
|
75
|
+
def citation_part
|
76
|
+
tc = tokens.select(&:has_citation?)
|
77
|
+
x = tc.first ? tc.first.citation_part : nil
|
78
|
+
y = tc.last ? tc.last.citation_part : nil
|
79
|
+
|
80
|
+
Citations.citation_make_range(x, y)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Returns the printable form of the div with all token forms and any
|
84
|
+
# presentation data.
|
85
|
+
#
|
86
|
+
# @return [String] the printable form of the div
|
87
|
+
def printable_form(options = {})
|
88
|
+
[presentation_before,
|
89
|
+
@children.map { |s| s.printable_form(options) },
|
90
|
+
presentation_after].compact.join
|
91
|
+
end
|
92
|
+
|
93
|
+
# Finds all sentences in the div.
|
94
|
+
#
|
95
|
+
# @return [Enumerator] sentences in the div
|
96
|
+
#
|
97
|
+
# @example Iterating sentences
|
98
|
+
# sentences.each { |s| puts s.id }
|
99
|
+
#
|
100
|
+
# @example Create an array with only reviewed sentences
|
101
|
+
# sentences.select(&:reviewed?)
|
102
|
+
#
|
103
|
+
# @example Counting sentences
|
104
|
+
# sentences.count #=> 200
|
105
|
+
#
|
106
|
+
def sentences
|
107
|
+
@children.to_enum
|
108
|
+
end
|
109
|
+
|
110
|
+
# Finds all tokens in the div.
|
111
|
+
#
|
112
|
+
# @return [Enumerator] tokens in the div
|
113
|
+
#
|
114
|
+
# @example Iterating tokens
|
115
|
+
# tokens.each { |t| puts t.id }
|
116
|
+
#
|
117
|
+
# @example Create an array with only empty tokens
|
118
|
+
# tokens.select(&:is_empty?)
|
119
|
+
#
|
120
|
+
# @example Counting tokens
|
121
|
+
# puts tokens.count #=> 200
|
122
|
+
#
|
123
|
+
def tokens
|
124
|
+
Enumerator.new do |y|
|
125
|
+
@children.each do |sentence|
|
126
|
+
sentence.tokens.each do |token|
|
127
|
+
y << token
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# Represents a positional tag, which consists of one or more fields each with
|
8
|
+
# its own value. The default implementation is of a positional tag with no
|
9
|
+
# fields. The class should be subclassed and the `fields` method overridden
|
10
|
+
# to implement a non-empty positional tag.
|
11
|
+
#
|
12
|
+
# @abstract Subclass and override {#fields} to implement a custom positional tag class.
|
13
|
+
class PositionalTag
|
14
|
+
include Comparable
|
15
|
+
|
16
|
+
# Creates a new positional tag.
|
17
|
+
#
|
18
|
+
# @param value [String, Hash, PositionalTag] initial value
|
19
|
+
#
|
20
|
+
def initialize(value = nil)
|
21
|
+
@fields = Hash.new
|
22
|
+
|
23
|
+
case value
|
24
|
+
when NilClass
|
25
|
+
when String
|
26
|
+
set_value!(fields.zip(value.split('')).to_h)
|
27
|
+
when Hash
|
28
|
+
set_value!(value)
|
29
|
+
when PositionalTag
|
30
|
+
set_value!(value.to_h)
|
31
|
+
else
|
32
|
+
raise ArgumentError, 'expected nil, Hash, String or PositionalTag'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns an integer, -1, 0 or 1, suitable for sorting the tag.
|
37
|
+
#
|
38
|
+
# @return [Integer]
|
39
|
+
#
|
40
|
+
def <=>(o)
|
41
|
+
to_s <=> o.to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the positional tag as a string.
|
45
|
+
#
|
46
|
+
# @return [String]
|
47
|
+
#
|
48
|
+
def to_s
|
49
|
+
# Iterate fields to ensure conversion of fields without a value to
|
50
|
+
# UNSET_FIELD.
|
51
|
+
fields.map { |field| self[field] }.join
|
52
|
+
end
|
53
|
+
|
54
|
+
# Checks if the tag is unitialized. The tag is uninitialized if no field
|
55
|
+
# has a value.
|
56
|
+
#
|
57
|
+
# @return [true, false]
|
58
|
+
#
|
59
|
+
def empty?
|
60
|
+
@fields.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns a hash representation of the tag. The keys are the names of each
|
64
|
+
# field as symbols, the values are the values of each field.
|
65
|
+
#
|
66
|
+
# @return [Hash<Symbol, String>]
|
67
|
+
#
|
68
|
+
def to_h
|
69
|
+
@fields
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the value of a field. An field without a value is returned
|
73
|
+
# as `-`.
|
74
|
+
#
|
75
|
+
# @param field [String, Symbol] name of field
|
76
|
+
#
|
77
|
+
# @return [String]
|
78
|
+
#
|
79
|
+
def [](field)
|
80
|
+
field = field.to_sym
|
81
|
+
|
82
|
+
raise ArgumentError, "invalid field #{field}" unless fields.include?(field)
|
83
|
+
|
84
|
+
@fields[field] || UNSET_FIELD
|
85
|
+
end
|
86
|
+
|
87
|
+
# Assigns a value to a field. Removing any value from a field can be done
|
88
|
+
# by assigning `nil` or `-`.
|
89
|
+
#
|
90
|
+
# @param field [String, Symbol] name of field
|
91
|
+
# @param value [String, nil]
|
92
|
+
#
|
93
|
+
# @return [String]
|
94
|
+
#
|
95
|
+
def []=(field, value)
|
96
|
+
field = field.to_sym
|
97
|
+
|
98
|
+
raise ArgumentError, "invalid field #{field}" unless fields.include?(field)
|
99
|
+
|
100
|
+
if value == UNSET_FIELD or value.nil?
|
101
|
+
@fields.delete(field)
|
102
|
+
else
|
103
|
+
@fields.store(field, value)
|
104
|
+
end
|
105
|
+
|
106
|
+
value
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns the field names. This method should be overridden by
|
110
|
+
# implementations. The names should be returned as an array of symbols.
|
111
|
+
#
|
112
|
+
# @return [Array<Symbol>]
|
113
|
+
#
|
114
|
+
def fields
|
115
|
+
[]
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
# The string representation of a field without a value.
|
121
|
+
UNSET_FIELD = '-'.freeze
|
122
|
+
|
123
|
+
def set_value!(o)
|
124
|
+
o.each { |k, v| self[k] = v }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|