wapiti 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wapiti/dataset.rb +21 -21
- data/lib/wapiti/sequence.rb +8 -8
- data/lib/wapiti/token.rb +3 -3
- data/lib/wapiti/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eefd2c624bb02b635f41b9577d303abc352783d2530ef42f3db7f91db2384174
|
4
|
+
data.tar.gz: 73c766d6e05599b5167743dfc53daf20f74db0af0fe22d3ea6a947e9882189ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 012c48b99ce4d6af1223f97fd03308f5af0c833e4db250842546b7f75de830fa700f71d52043fdd6c2b54f1f24cc83c0a0929211bf23153113d6efadf619fb68
|
7
|
+
data.tar.gz: 1a78e6de9025f6f6e199ff70e0973a7aeb42beda218220f3c63801a8239cd5371bf4546796db82134646d4491fa9cf00072371cfe058a4632ab3946b621a32bd
|
data/lib/wapiti/dataset.rb
CHANGED
@@ -12,7 +12,7 @@ module Wapiti
|
|
12
12
|
def_delegators :sequences, :[], :empty?, :length, :size, :slice!, :uniq!
|
13
13
|
|
14
14
|
class << self
|
15
|
-
def parse(dataset, separator: /(?:\r?\n){2,}/, **
|
15
|
+
def parse(dataset, separator: /(?:\r?\n){2,}/, **opts)
|
16
16
|
case dataset
|
17
17
|
when Array
|
18
18
|
new(dataset.map { |seq|
|
@@ -23,12 +23,12 @@ module Wapiti
|
|
23
23
|
})
|
24
24
|
when String
|
25
25
|
new(dataset.split(separator).map { |seq|
|
26
|
-
Sequence.parse(seq, **
|
26
|
+
Sequence.parse(seq, **opts)
|
27
27
|
}.reject(&:empty?))
|
28
28
|
when REXML::Document
|
29
29
|
new(dataset.elements.to_a('dataset/sequence').map { |seq|
|
30
30
|
Sequence.new(seq.elements.to_a.map { |sgm|
|
31
|
-
sgm.text.strip.split(
|
31
|
+
sgm.text.strip.split(opts[:spacer] || /\s+/).map { |tk|
|
32
32
|
Token.new tk, label: sgm.name
|
33
33
|
}
|
34
34
|
}.flatten)
|
@@ -38,16 +38,16 @@ module Wapiti
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def open(path, format: File.extname(path), **
|
41
|
+
def open(path, format: File.extname(path), **opts)
|
42
42
|
raise ArgumentError,
|
43
43
|
"cannot open dataset from tainted path: '#{path}'" if path.tainted?
|
44
44
|
|
45
45
|
input = File.read(path, encoding: 'utf-8')
|
46
46
|
case format.downcase
|
47
47
|
when '.xml', 'xml'
|
48
|
-
parse(REXML::Document.new(input), **
|
48
|
+
parse(REXML::Document.new(input), **opts)
|
49
49
|
else
|
50
|
-
parse(input, **
|
50
|
+
parse(input, **opts)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
end
|
@@ -86,8 +86,8 @@ module Wapiti
|
|
86
86
|
self
|
87
87
|
end
|
88
88
|
|
89
|
-
def sample(n = 1, **
|
90
|
-
Dataset.new sequences.sample(n, **
|
89
|
+
def sample(n = 1, **opts)
|
90
|
+
Dataset.new sequences.sample(n, **opts)
|
91
91
|
end
|
92
92
|
|
93
93
|
def slice(start, length = 1)
|
@@ -114,20 +114,20 @@ module Wapiti
|
|
114
114
|
Dataset.new(sequences & other.sequences)
|
115
115
|
end
|
116
116
|
|
117
|
-
def to_s(separator: "\n\n", **
|
118
|
-
map { |sq| sq.to_s(**
|
117
|
+
def to_s(separator: "\n\n", **opts)
|
118
|
+
map { |sq| sq.to_s(**opts) }.join(separator)
|
119
119
|
end
|
120
120
|
|
121
|
-
def to_txt(separator: "\n", **
|
122
|
-
map { |sq| sq.to_sentence(**
|
121
|
+
def to_txt(separator: "\n", **opts)
|
122
|
+
map { |sq| sq.to_sentence(**opts) }.join(separator)
|
123
123
|
end
|
124
124
|
|
125
|
-
def to_a(**
|
126
|
-
map { |sq| sq.to_a(**
|
125
|
+
def to_a(**opts)
|
126
|
+
map { |sq| sq.to_a(**opts) }
|
127
127
|
end
|
128
128
|
|
129
|
-
def to_xml(**
|
130
|
-
xml = Builder::XmlMarkup.new(**
|
129
|
+
def to_xml(**opts)
|
130
|
+
xml = Builder::XmlMarkup.new(**opts)
|
131
131
|
xml.instruct!
|
132
132
|
xml.dataset do |ds|
|
133
133
|
each do |seq|
|
@@ -136,19 +136,19 @@ module Wapiti
|
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
|
-
def to_yml(**
|
140
|
-
map { |sq| sq.to_h(**
|
139
|
+
def to_yml(**opts)
|
140
|
+
map { |sq| sq.to_h(**opts) }
|
141
141
|
end
|
142
142
|
|
143
|
-
def save(path, format: File.extname(path), **
|
143
|
+
def save(path, format: File.extname(path), **opts)
|
144
144
|
raise ArgumentError,
|
145
145
|
"cannot write dataset to tainted path: '#{path}'" if path.tainted?
|
146
146
|
|
147
147
|
output = case format.downcase
|
148
148
|
when '.txt', 'txt'
|
149
|
-
to_s(**
|
149
|
+
to_s(**opts)
|
150
150
|
when '.xml', 'xml'
|
151
|
-
to_xml(**
|
151
|
+
to_xml(**opts)
|
152
152
|
else
|
153
153
|
raise ArgumentError, "unknown format: '#{format}'"
|
154
154
|
end
|
data/lib/wapiti/sequence.rb
CHANGED
@@ -11,9 +11,9 @@ module Wapiti
|
|
11
11
|
def_delegators :tokens, :[], :empty?, :size
|
12
12
|
|
13
13
|
class << self
|
14
|
-
def parse(string, delimiter: /\r?\n/, **
|
14
|
+
def parse(string, delimiter: /\r?\n/, **opts)
|
15
15
|
new(string.split(delimiter).map { |token|
|
16
|
-
Token.parse token, **
|
16
|
+
Token.parse token, **opts
|
17
17
|
}.reject(&:empty?))
|
18
18
|
end
|
19
19
|
end
|
@@ -82,20 +82,20 @@ module Wapiti
|
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
|
-
def to_a(**
|
86
|
-
tokens.map { |tk| tk.to_s(**
|
85
|
+
def to_a(**opts)
|
86
|
+
tokens.map { |tk| tk.to_s(**opts) }
|
87
87
|
end
|
88
88
|
|
89
|
-
def to_s(delimiter: "\n", **
|
90
|
-
tokens.map { |tk| tk.to_s(**
|
89
|
+
def to_s(delimiter: "\n", **opts)
|
90
|
+
tokens.map { |tk| tk.to_s(**opts) }.join(delimiter)
|
91
91
|
end
|
92
92
|
|
93
93
|
def to_sentence(delimiter: ' ')
|
94
94
|
to_s(delimiter: delimiter, expanded: false, tagged: false)
|
95
95
|
end
|
96
96
|
|
97
|
-
def to_h(symbolize_keys: false, **
|
98
|
-
each_segment(**
|
97
|
+
def to_h(symbolize_keys: false, **opts)
|
98
|
+
each_segment(**opts).reduce({}) do |h, (label, segment)|
|
99
99
|
label = label.intern if symbolize_keys
|
100
100
|
h[label] = [] unless h.key? label
|
101
101
|
h[label] << segment
|
data/lib/wapiti/token.rb
CHANGED
@@ -7,7 +7,7 @@ module Wapiti
|
|
7
7
|
attr_accessor :value, :label, :observations, :score
|
8
8
|
|
9
9
|
class << self
|
10
|
-
def parse(string, spacer: /\s+/, tagged: false)
|
10
|
+
def parse(string, spacer: /\s+/, tagged: false, **opts)
|
11
11
|
value, *observations = string.split(spacer)
|
12
12
|
new(value, {
|
13
13
|
label: (tagged ? observations.pop : nil).to_s,
|
@@ -56,8 +56,8 @@ module Wapiti
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
-
def to_s(spacer: ' ', **
|
60
|
-
to_a(**
|
59
|
+
def to_s(spacer: ' ', **opts)
|
60
|
+
to_a(**opts).join(spacer)
|
61
61
|
end
|
62
62
|
|
63
63
|
def to_a(expanded: true, tagged: true, encode: false)
|
data/lib/wapiti/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wapiti
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|