proiel 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +19 -0
- data/README.md +99 -0
- data/bin/console +6 -0
- data/bin/setup +5 -0
- data/lib/proiel/annotation_schema.rb +127 -0
- data/lib/proiel/citations.rb +84 -0
- data/lib/proiel/div.rb +133 -0
- data/lib/proiel/positional_tag.rb +127 -0
- data/lib/proiel/proiel_xml/proiel-1.0/proiel-1.0.xsd +172 -0
- data/lib/proiel/proiel_xml/proiel-1.0/teilite.xsd +7387 -0
- data/lib/proiel/proiel_xml/proiel-1.0/xml.xsd +287 -0
- data/lib/proiel/proiel_xml/proiel-2.0/proiel-2.0.xsd +185 -0
- data/lib/proiel/proiel_xml/reader.rb +237 -0
- data/lib/proiel/proiel_xml/schema.rb +81 -0
- data/lib/proiel/proiel_xml/validator.rb +177 -0
- data/lib/proiel/sentence.rb +191 -0
- data/lib/proiel/source.rb +114 -0
- data/lib/proiel/statistics.rb +41 -0
- data/lib/proiel/token.rb +407 -0
- data/lib/proiel/tokenization.rb +90 -0
- data/lib/proiel/treebank.rb +214 -0
- data/lib/proiel/treebank_object.rb +21 -0
- data/lib/proiel/version.rb +9 -0
- data/lib/proiel.rb +28 -0
- metadata +210 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6271d2f5d29934447660cf7a2ba1f416a1171b50
|
4
|
+
data.tar.gz: 467914536f5f6794fa84729227b4f0ddff62c2dd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ec4b448baff57c7faf9b31861667e18cc3f9297dc946c31d6e9e3b491050b39af8e0894eea5ca82c2051fb45d2d90c16b5046568ce780a79f7027beaff323e3
|
7
|
+
data.tar.gz: 1a8c3cf8c2c29b11904bcc67e4e4a0aa557c38bf8c0dbf66deaef36a404cb7deceae2fbefd7fc21d82c208408cf515e291ee2dc960bef56820108cfe390d7847
|
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2015 Marius L. Jøhndal
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# PROIEL treebank utility library
|
2
|
+
|
3
|
+
This is a utility library for reading and manipulating treebanks that use the
|
4
|
+
PROIEL annotation scheme and the PROIEL XML-based interchange format.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
To install this library you need Ruby 2.1 or newer.
|
9
|
+
|
10
|
+
```shell
|
11
|
+
gem install proiel
|
12
|
+
```
|
13
|
+
|
14
|
+
## Getting started
|
15
|
+
|
16
|
+
The recommended way to use this library in your application is with `bundler`.
|
17
|
+
Create a `Gemfile` with the following content:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
source 'https://rubygems.org'
|
21
|
+
gem 'proiel', '~> 1.0'
|
22
|
+
```
|
23
|
+
|
24
|
+
and then execute
|
25
|
+
|
26
|
+
```shell
|
27
|
+
bundle
|
28
|
+
```
|
29
|
+
|
30
|
+
To download a sample treebank, initialize a new git repository and add the
|
31
|
+
[PROIEL treebank](http://proiel.github.io) as a submodule:
|
32
|
+
|
33
|
+
```shell
|
34
|
+
git init
|
35
|
+
mkdir vendor
|
36
|
+
git submodule add --depth 1 https://github.com/proiel/proiel-treebank.git vendor/proiel-treebank
|
37
|
+
```
|
38
|
+
|
39
|
+
Here is a skeleton programme to get you started. Save this as `myproject.rb`:
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
#!/usr/bin/env ruby
|
43
|
+
require 'proiel'
|
44
|
+
|
45
|
+
tb = PROIEL::Treebank.new
|
46
|
+
Dir[File.join('vendor', 'proiel-treebank', '*.xml')].each do |filename|
|
47
|
+
puts "Reading #{filename}..."
|
48
|
+
tb.load_from_xml(filename)
|
49
|
+
end
|
50
|
+
|
51
|
+
tb.sources.each do |source|
|
52
|
+
source.divs.each do |div|
|
53
|
+
div.sentences.each do |sentence|
|
54
|
+
sentence.tokens.each do |token|
|
55
|
+
# Do something
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
You can now run this as:
|
63
|
+
|
64
|
+
```shell
|
65
|
+
bundle exec ruby myproject.rb
|
66
|
+
```
|
67
|
+
|
68
|
+
See the [wiki](https://github.com/proiel/proiel/wiki) for more information.
|
69
|
+
|
70
|
+
## Versioning
|
71
|
+
|
72
|
+
`proiel` aims to adhere to [Semantic Versioning 2.0.0](http://semver.org/spec/v2.0.0.html). This means that a patch version or minor version should not break backward compatibility of a public API, and that breaking changes should only be introduced with new major versions. When specifying a dependency on this gem it is best practice to use a pessimistic version constraint with two digits of precision:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
spec.add_dependency 'proiel', '~> 1.0'
|
76
|
+
```
|
77
|
+
|
78
|
+
## Development
|
79
|
+
|
80
|
+
Check out the git repository from GitHub and run `bin/setup` to install
|
81
|
+
all development dependencies. Then run `rake` to run the tests.
|
82
|
+
|
83
|
+
You can also run `bin/console` for an interactive prompt to experiment with.
|
84
|
+
|
85
|
+
To install a development version of this gem, run `bundle exec rake install`.
|
86
|
+
|
87
|
+
To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the gem to [rubygems.org](https://rubygems.org).
|
88
|
+
|
89
|
+
## Documentation
|
90
|
+
|
91
|
+
Documentation can be generated using YARD:
|
92
|
+
|
93
|
+
```sh
|
94
|
+
yard
|
95
|
+
```
|
96
|
+
|
97
|
+
## Contributing
|
98
|
+
|
99
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/proiel/proiel.
|
data/bin/console
ADDED
data/bin/setup
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# A representation of the annotation schema found in the header of a PROIEL
|
8
|
+
# XML file. This should not be confused with the PROIEL XML schema, which is
|
9
|
+
# used for validating the XML in a PROIEL XML file.
|
10
|
+
class AnnotationSchema
|
11
|
+
# @return [Hash<String,PartOfSpeechTagDefinition>] definition of part of speech tags
|
12
|
+
attr_reader :part_of_speech_tags
|
13
|
+
|
14
|
+
# @return [Hash<String,RelationTagDefinition>] definition of relation tags
|
15
|
+
attr_reader :relation_tags
|
16
|
+
|
17
|
+
# @return [Hash<Symbol,Hash<String,MorphologyFieldTagDefinition>>] definition of morphology tags
|
18
|
+
attr_reader :morphology_tags
|
19
|
+
|
20
|
+
# @return [Hash<String,InformationStatusTagDefinition>] definition of information status tags
|
21
|
+
attr_reader :information_status_tags
|
22
|
+
|
23
|
+
# Creates a new annotation schema object.
|
24
|
+
def initialize(xml_object)
|
25
|
+
@part_of_speech_tags = make_part_of_speech_tags(xml_object).freeze
|
26
|
+
@relation_tags = make_relation_tags(xml_object).freeze
|
27
|
+
@morphology_tags = make_morphology_tags(xml_object).freeze
|
28
|
+
@information_status_tags = make_information_status_tags(xml_object).freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Hash<String,RelationTagDefinition>] definition of primary relation tags
|
32
|
+
def primary_relations
|
33
|
+
@relation_tags.select { |_, features| features.primary }
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Hash<String,RelationTagDefinition>] definition of secondary relation tags
|
37
|
+
def secondary_relations
|
38
|
+
@relation_tags.select { |_, features| features.secondary }
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tests for equality of two annotation schema objects.
|
42
|
+
#
|
43
|
+
# @return [true,false]
|
44
|
+
#
|
45
|
+
def ==(o)
|
46
|
+
@part_of_speech_tags.sort_by(&:first) == o.part_of_speech_tags.sort_by(&:first) and
|
47
|
+
@relation_tags.sort_by(&:first) == o.relation_tags.sort_by(&:first)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def make_tag_hash(element)
|
53
|
+
element.values.map { |e| [e.tag, yield(e)] }.compact.to_h
|
54
|
+
end
|
55
|
+
|
56
|
+
def make_relation_tags(xml_object)
|
57
|
+
make_tag_hash(xml_object.relations) do |e|
|
58
|
+
RelationTagDefinition.new(e.summary, e.primary == 'true', e.secondary == 'true')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def make_part_of_speech_tags(xml_object)
|
63
|
+
make_tag_hash(xml_object.parts_of_speech) do |e|
|
64
|
+
PartOfSpeechTagDefinition.new(e.summary)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def make_morphology_tags(xml_object)
|
69
|
+
xml_object.morphology.fields.map do |f|
|
70
|
+
v =
|
71
|
+
make_tag_hash(f) do |e|
|
72
|
+
MorphologyFieldTagDefinition.new(e.summary)
|
73
|
+
end
|
74
|
+
[f.tag, v]
|
75
|
+
end.to_h
|
76
|
+
end
|
77
|
+
|
78
|
+
def make_information_status_tags(xml_object)
|
79
|
+
make_tag_hash(xml_object.information_statuses) do |e|
|
80
|
+
InformationStatusTagDefinition.new(e.summary)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# A tag definitions.
|
86
|
+
#
|
87
|
+
# @abstract
|
88
|
+
class GenericTagDefinition
|
89
|
+
attr_reader :summary
|
90
|
+
|
91
|
+
def initialize(summary)
|
92
|
+
@summary = summary
|
93
|
+
end
|
94
|
+
|
95
|
+
# Tests equality of two tag definitions.
|
96
|
+
def ==(o)
|
97
|
+
@summary == o.summary
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Definition of an information status tag.
|
102
|
+
class InformationStatusTagDefinition < GenericTagDefinition; end
|
103
|
+
|
104
|
+
# Definition of a relation tag.
|
105
|
+
class RelationTagDefinition < GenericTagDefinition
|
106
|
+
attr_reader :primary
|
107
|
+
attr_reader :secondary
|
108
|
+
|
109
|
+
def initialize(summary, primary, secondary)
|
110
|
+
super(summary)
|
111
|
+
|
112
|
+
@primary = primary
|
113
|
+
@secondary = secondary
|
114
|
+
end
|
115
|
+
|
116
|
+
# Tests equality of two tag definitions.
|
117
|
+
def ==(o)
|
118
|
+
@summary == o.summary and @primary == o.primary and @secondary == o.secondary
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Definition of a morphology field tag.
|
123
|
+
class MorphologyFieldTagDefinition < GenericTagDefinition; end
|
124
|
+
|
125
|
+
# Definition of a part of speech tag.
|
126
|
+
class PartOfSpeechTagDefinition < GenericTagDefinition; end
|
127
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
module Citations
|
8
|
+
# Returns a citation range that spans `cit1` to `cit2`.
|
9
|
+
#
|
10
|
+
# The regular expression `dividers` is used to chunk the strings, and then
|
11
|
+
# the longest common prefix of chunks is removed from `cit2`. `dividers`
|
12
|
+
# should chosen so that the chunks match logical components of a citation,
|
13
|
+
# e.g. book titles, chapter numbers and section identifiers.
|
14
|
+
#
|
15
|
+
# @param cit1 [String] first citation in range
|
16
|
+
# @param cit2 [String] second citation in range
|
17
|
+
# @param dividers [Regexp] dividing elements between components of citation
|
18
|
+
#
|
19
|
+
# @return [String]
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# citation_make_range('Matt 5.16', 'Matt 5.27') # => "Matt 5.16–27"
|
23
|
+
# citation_make_range('Matt 4.13', 'Matt 5.27') # => "Matt 4.13–5.27"
|
24
|
+
#
|
25
|
+
def self.citation_make_range(cit1, cit2, dividers: /([\s\.]+)/)
|
26
|
+
raise ArgumentError unless cit1.is_a?(String) or cit1.nil?
|
27
|
+
raise ArgumentError unless cit2.is_a?(String) or cit1.nil?
|
28
|
+
|
29
|
+
# Remove any nil and empty-string citation, and reduce a range that starts
|
30
|
+
# and ends with the same citation to a single citation.
|
31
|
+
c = [cit1, cit2].reject { |c| c.nil? || c.empty? }.uniq
|
32
|
+
|
33
|
+
case c.length
|
34
|
+
when 0
|
35
|
+
nil
|
36
|
+
when 1
|
37
|
+
c.first
|
38
|
+
else
|
39
|
+
s = citation_strip_prefix(cit1, cit2, dividers: dividers)
|
40
|
+
[cit1, s].reject(&:empty?).join("\u{2013}")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns `cit2` without the longest prefix that `cit1` and `cit2` have in
|
45
|
+
# common.
|
46
|
+
#
|
47
|
+
# The longest common prefix is not computed from the raw strings `cit1` and
|
48
|
+
# `cit2` but from string chunks. The regular expression `dividers` is used
|
49
|
+
# to chunk the strings, and then the longest prefix of chunks is removed.
|
50
|
+
#
|
51
|
+
# `dividers` should chosen so that the chunks match logical componets of a
|
52
|
+
# citation, e.g. book titles, chapter numbers and section identifiers.
|
53
|
+
#
|
54
|
+
# @param cit1 [String] first citation in range
|
55
|
+
# @param cit2 [String] second citation in range
|
56
|
+
# @param dividers [Regexp] dividing elements between components of citation
|
57
|
+
#
|
58
|
+
# @return [String]
|
59
|
+
#
|
60
|
+
# @example
|
61
|
+
# citation_strip_prefix('Matt 5.16', 'Matt 5.27') # => "27"
|
62
|
+
# citation_strip_prefix('Matt 5.26', 'Matt 5.27') # => "27"
|
63
|
+
# citation_strip_prefix('Matt 4.13', 'Matt 5.27') # => "5.27"
|
64
|
+
#
|
65
|
+
def self.citation_strip_prefix(cit1, cit2, dividers: /([\s\.]+)/u)
|
66
|
+
raise ArgumentError unless cit1.is_a?(String)
|
67
|
+
raise ArgumentError unless cit2.is_a?(String)
|
68
|
+
|
69
|
+
x, y = cit1.split(dividers), cit2.split(dividers)
|
70
|
+
|
71
|
+
# Interleave x and y but compensate for zip's behaviour when
|
72
|
+
# y.length < x.length
|
73
|
+
zipped = x.length >= y.length ? x.zip(y) : y.zip(x).map(&:reverse)
|
74
|
+
|
75
|
+
zipped.inject('') do |d, (a, b)|
|
76
|
+
if not d.empty? or a != b
|
77
|
+
d + (b || '')
|
78
|
+
else
|
79
|
+
''
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/lib/proiel/div.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# A div object in a treebank.
|
8
|
+
class Div < TreebankObject
|
9
|
+
extend Memoist
|
10
|
+
|
11
|
+
# Returns the ID of the div.
|
12
|
+
#
|
13
|
+
# PROIEL XML 2.0 lacks IDs for divs while later versions require them. For
|
14
|
+
# PROIEL XML 2.0 unique IDs are generated by PROIEL::Treebank.
|
15
|
+
#
|
16
|
+
# @return [Fixnum] ID of the div
|
17
|
+
attr_reader :id
|
18
|
+
|
19
|
+
# @return [Source] source that the div belongs to
|
20
|
+
attr_reader :source
|
21
|
+
|
22
|
+
# @return [nil, String] title of the div
|
23
|
+
attr_reader :title
|
24
|
+
|
25
|
+
# @return [nil, String] presentation material before form
|
26
|
+
attr_reader :presentation_before
|
27
|
+
|
28
|
+
# @return [nil, String] presentation material after form
|
29
|
+
attr_reader :presentation_after
|
30
|
+
|
31
|
+
# Creates a new div object.
|
32
|
+
def initialize(parent, id, title, presentation_before, presentation_after, &block)
|
33
|
+
@source = parent
|
34
|
+
|
35
|
+
raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
|
36
|
+
@id = id
|
37
|
+
|
38
|
+
raise ArgumentError, 'string or nil expected' unless title.nil? or title.is_a?(String)
|
39
|
+
@title = title.freeze
|
40
|
+
|
41
|
+
raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
|
42
|
+
@presentation_before = presentation_before.freeze
|
43
|
+
|
44
|
+
raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
|
45
|
+
@presentation_after = presentation_after.freeze
|
46
|
+
|
47
|
+
@children = block.call(self) if block_given?
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Treebank] parent treebank object
|
51
|
+
def treebank
|
52
|
+
@source.treebank
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [String] language of the div as an ISO 639-3 language tag
|
56
|
+
def language
|
57
|
+
source.language
|
58
|
+
end
|
59
|
+
|
60
|
+
memoize :language
|
61
|
+
|
62
|
+
# @return [String] a complete citation for the div
|
63
|
+
def citation
|
64
|
+
[source.citation_part, citation_part].join(' ')
|
65
|
+
end
|
66
|
+
|
67
|
+
# Computes an appropriate citation component for the div.
|
68
|
+
#
|
69
|
+
# The computed citation component must be concatenated with the citation
|
70
|
+
# component provided by the source to produce a complete citation.
|
71
|
+
#
|
72
|
+
# @see citation
|
73
|
+
#
|
74
|
+
# @return [String] the citation component
|
75
|
+
def citation_part
|
76
|
+
tc = tokens.select(&:has_citation?)
|
77
|
+
x = tc.first ? tc.first.citation_part : nil
|
78
|
+
y = tc.last ? tc.last.citation_part : nil
|
79
|
+
|
80
|
+
Citations.citation_make_range(x, y)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Returns the printable form of the div with all token forms and any
|
84
|
+
# presentation data.
|
85
|
+
#
|
86
|
+
# @return [String] the printable form of the div
|
87
|
+
def printable_form(options = {})
|
88
|
+
[presentation_before,
|
89
|
+
@children.map { |s| s.printable_form(options) },
|
90
|
+
presentation_after].compact.join
|
91
|
+
end
|
92
|
+
|
93
|
+
# Finds all sentences in the div.
|
94
|
+
#
|
95
|
+
# @return [Enumerator] sentences in the div
|
96
|
+
#
|
97
|
+
# @example Iterating sentences
|
98
|
+
# sentences.each { |s| puts s.id }
|
99
|
+
#
|
100
|
+
# @example Create an array with only reviewed sentences
|
101
|
+
# sentences.select(&:reviewed?)
|
102
|
+
#
|
103
|
+
# @example Counting sentences
|
104
|
+
# sentences.count #=> 200
|
105
|
+
#
|
106
|
+
def sentences
|
107
|
+
@children.to_enum
|
108
|
+
end
|
109
|
+
|
110
|
+
# Finds all tokens in the div.
|
111
|
+
#
|
112
|
+
# @return [Enumerator] tokens in the div
|
113
|
+
#
|
114
|
+
# @example Iterating tokens
|
115
|
+
# tokens.each { |t| puts t.id }
|
116
|
+
#
|
117
|
+
# @example Create an array with only empty tokens
|
118
|
+
# tokens.select(&:is_empty?)
|
119
|
+
#
|
120
|
+
# @example Counting tokens
|
121
|
+
# puts tokens.count #=> 200
|
122
|
+
#
|
123
|
+
def tokens
|
124
|
+
Enumerator.new do |y|
|
125
|
+
@children.each do |sentence|
|
126
|
+
sentence.tokens.each do |token|
|
127
|
+
y << token
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2015 Marius L. Jøhndal
|
3
|
+
#
|
4
|
+
# See LICENSE in the top-level source directory for licensing terms.
|
5
|
+
#++
|
6
|
+
module PROIEL
|
7
|
+
# Represents a positional tag, which consists of one or more fields each with
|
8
|
+
# its own value. The default implementation is of a positional tag with no
|
9
|
+
# fields. The class should be subclassed and the `fields` method overridden
|
10
|
+
# to implement a non-empty positional tag.
|
11
|
+
#
|
12
|
+
# @abstract Subclass and override {#fields} to implement a custom positional tag class.
|
13
|
+
class PositionalTag
|
14
|
+
include Comparable
|
15
|
+
|
16
|
+
# Creates a new positional tag.
|
17
|
+
#
|
18
|
+
# @param value [String, Hash, PositionalTag] initial value
|
19
|
+
#
|
20
|
+
def initialize(value = nil)
|
21
|
+
@fields = Hash.new
|
22
|
+
|
23
|
+
case value
|
24
|
+
when NilClass
|
25
|
+
when String
|
26
|
+
set_value!(fields.zip(value.split('')).to_h)
|
27
|
+
when Hash
|
28
|
+
set_value!(value)
|
29
|
+
when PositionalTag
|
30
|
+
set_value!(value.to_h)
|
31
|
+
else
|
32
|
+
raise ArgumentError, 'expected nil, Hash, String or PositionalTag'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns an integer, -1, 0 or 1, suitable for sorting the tag.
|
37
|
+
#
|
38
|
+
# @return [Integer]
|
39
|
+
#
|
40
|
+
def <=>(o)
|
41
|
+
to_s <=> o.to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns the positional tag as a string.
|
45
|
+
#
|
46
|
+
# @return [String]
|
47
|
+
#
|
48
|
+
def to_s
|
49
|
+
# Iterate fields to ensure conversion of fields without a value to
|
50
|
+
# UNSET_FIELD.
|
51
|
+
fields.map { |field| self[field] }.join
|
52
|
+
end
|
53
|
+
|
54
|
+
# Checks if the tag is unitialized. The tag is uninitialized if no field
|
55
|
+
# has a value.
|
56
|
+
#
|
57
|
+
# @return [true, false]
|
58
|
+
#
|
59
|
+
def empty?
|
60
|
+
@fields.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns a hash representation of the tag. The keys are the names of each
|
64
|
+
# field as symbols, the values are the values of each field.
|
65
|
+
#
|
66
|
+
# @return [Hash<Symbol, String>]
|
67
|
+
#
|
68
|
+
def to_h
|
69
|
+
@fields
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the value of a field. An field without a value is returned
|
73
|
+
# as `-`.
|
74
|
+
#
|
75
|
+
# @param field [String, Symbol] name of field
|
76
|
+
#
|
77
|
+
# @return [String]
|
78
|
+
#
|
79
|
+
def [](field)
|
80
|
+
field = field.to_sym
|
81
|
+
|
82
|
+
raise ArgumentError, "invalid field #{field}" unless fields.include?(field)
|
83
|
+
|
84
|
+
@fields[field] || UNSET_FIELD
|
85
|
+
end
|
86
|
+
|
87
|
+
# Assigns a value to a field. Removing any value from a field can be done
|
88
|
+
# by assigning `nil` or `-`.
|
89
|
+
#
|
90
|
+
# @param field [String, Symbol] name of field
|
91
|
+
# @param value [String, nil]
|
92
|
+
#
|
93
|
+
# @return [String]
|
94
|
+
#
|
95
|
+
def []=(field, value)
|
96
|
+
field = field.to_sym
|
97
|
+
|
98
|
+
raise ArgumentError, "invalid field #{field}" unless fields.include?(field)
|
99
|
+
|
100
|
+
if value == UNSET_FIELD or value.nil?
|
101
|
+
@fields.delete(field)
|
102
|
+
else
|
103
|
+
@fields.store(field, value)
|
104
|
+
end
|
105
|
+
|
106
|
+
value
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns the field names. This method should be overridden by
|
110
|
+
# implementations. The names should be returned as an array of symbols.
|
111
|
+
#
|
112
|
+
# @return [Array<Symbol>]
|
113
|
+
#
|
114
|
+
def fields
|
115
|
+
[]
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
# The string representation of a field without a value.
|
121
|
+
UNSET_FIELD = '-'.freeze
|
122
|
+
|
123
|
+
def set_value!(o)
|
124
|
+
o.each { |k, v| self[k] = v }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|