wapiti 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/wapiti/dataset.rb +21 -21
- data/lib/wapiti/sequence.rb +8 -8
- data/lib/wapiti/token.rb +3 -3
- data/lib/wapiti/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eefd2c624bb02b635f41b9577d303abc352783d2530ef42f3db7f91db2384174
|
4
|
+
data.tar.gz: 73c766d6e05599b5167743dfc53daf20f74db0af0fe22d3ea6a947e9882189ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 012c48b99ce4d6af1223f97fd03308f5af0c833e4db250842546b7f75de830fa700f71d52043fdd6c2b54f1f24cc83c0a0929211bf23153113d6efadf619fb68
|
7
|
+
data.tar.gz: 1a78e6de9025f6f6e199ff70e0973a7aeb42beda218220f3c63801a8239cd5371bf4546796db82134646d4491fa9cf00072371cfe058a4632ab3946b621a32bd
|
data/lib/wapiti/dataset.rb
CHANGED
@@ -12,7 +12,7 @@ module Wapiti
|
|
12
12
|
def_delegators :sequences, :[], :empty?, :length, :size, :slice!, :uniq!
|
13
13
|
|
14
14
|
class << self
|
15
|
-
def parse(dataset, separator: /(?:\r?\n){2,}/, **
|
15
|
+
def parse(dataset, separator: /(?:\r?\n){2,}/, **opts)
|
16
16
|
case dataset
|
17
17
|
when Array
|
18
18
|
new(dataset.map { |seq|
|
@@ -23,12 +23,12 @@ module Wapiti
|
|
23
23
|
})
|
24
24
|
when String
|
25
25
|
new(dataset.split(separator).map { |seq|
|
26
|
-
Sequence.parse(seq, **
|
26
|
+
Sequence.parse(seq, **opts)
|
27
27
|
}.reject(&:empty?))
|
28
28
|
when REXML::Document
|
29
29
|
new(dataset.elements.to_a('dataset/sequence').map { |seq|
|
30
30
|
Sequence.new(seq.elements.to_a.map { |sgm|
|
31
|
-
sgm.text.strip.split(
|
31
|
+
sgm.text.strip.split(opts[:spacer] || /\s+/).map { |tk|
|
32
32
|
Token.new tk, label: sgm.name
|
33
33
|
}
|
34
34
|
}.flatten)
|
@@ -38,16 +38,16 @@ module Wapiti
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def open(path, format: File.extname(path), **
|
41
|
+
def open(path, format: File.extname(path), **opts)
|
42
42
|
raise ArgumentError,
|
43
43
|
"cannot open dataset from tainted path: '#{path}'" if path.tainted?
|
44
44
|
|
45
45
|
input = File.read(path, encoding: 'utf-8')
|
46
46
|
case format.downcase
|
47
47
|
when '.xml', 'xml'
|
48
|
-
parse(REXML::Document.new(input), **
|
48
|
+
parse(REXML::Document.new(input), **opts)
|
49
49
|
else
|
50
|
-
parse(input, **
|
50
|
+
parse(input, **opts)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
end
|
@@ -86,8 +86,8 @@ module Wapiti
|
|
86
86
|
self
|
87
87
|
end
|
88
88
|
|
89
|
-
def sample(n = 1, **
|
90
|
-
Dataset.new sequences.sample(n, **
|
89
|
+
def sample(n = 1, **opts)
|
90
|
+
Dataset.new sequences.sample(n, **opts)
|
91
91
|
end
|
92
92
|
|
93
93
|
def slice(start, length = 1)
|
@@ -114,20 +114,20 @@ module Wapiti
|
|
114
114
|
Dataset.new(sequences & other.sequences)
|
115
115
|
end
|
116
116
|
|
117
|
-
def to_s(separator: "\n\n", **
|
118
|
-
map { |sq| sq.to_s(**
|
117
|
+
def to_s(separator: "\n\n", **opts)
|
118
|
+
map { |sq| sq.to_s(**opts) }.join(separator)
|
119
119
|
end
|
120
120
|
|
121
|
-
def to_txt(separator: "\n", **
|
122
|
-
map { |sq| sq.to_sentence(**
|
121
|
+
def to_txt(separator: "\n", **opts)
|
122
|
+
map { |sq| sq.to_sentence(**opts) }.join(separator)
|
123
123
|
end
|
124
124
|
|
125
|
-
def to_a(**
|
126
|
-
map { |sq| sq.to_a(**
|
125
|
+
def to_a(**opts)
|
126
|
+
map { |sq| sq.to_a(**opts) }
|
127
127
|
end
|
128
128
|
|
129
|
-
def to_xml(**
|
130
|
-
xml = Builder::XmlMarkup.new(**
|
129
|
+
def to_xml(**opts)
|
130
|
+
xml = Builder::XmlMarkup.new(**opts)
|
131
131
|
xml.instruct!
|
132
132
|
xml.dataset do |ds|
|
133
133
|
each do |seq|
|
@@ -136,19 +136,19 @@ module Wapiti
|
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
|
-
def to_yml(**
|
140
|
-
map { |sq| sq.to_h(**
|
139
|
+
def to_yml(**opts)
|
140
|
+
map { |sq| sq.to_h(**opts) }
|
141
141
|
end
|
142
142
|
|
143
|
-
def save(path, format: File.extname(path), **
|
143
|
+
def save(path, format: File.extname(path), **opts)
|
144
144
|
raise ArgumentError,
|
145
145
|
"cannot write dataset to tainted path: '#{path}'" if path.tainted?
|
146
146
|
|
147
147
|
output = case format.downcase
|
148
148
|
when '.txt', 'txt'
|
149
|
-
to_s(**
|
149
|
+
to_s(**opts)
|
150
150
|
when '.xml', 'xml'
|
151
|
-
to_xml(**
|
151
|
+
to_xml(**opts)
|
152
152
|
else
|
153
153
|
raise ArgumentError, "unknown format: '#{format}'"
|
154
154
|
end
|
data/lib/wapiti/sequence.rb
CHANGED
@@ -11,9 +11,9 @@ module Wapiti
|
|
11
11
|
def_delegators :tokens, :[], :empty?, :size
|
12
12
|
|
13
13
|
class << self
|
14
|
-
def parse(string, delimiter: /\r?\n/, **
|
14
|
+
def parse(string, delimiter: /\r?\n/, **opts)
|
15
15
|
new(string.split(delimiter).map { |token|
|
16
|
-
Token.parse token, **
|
16
|
+
Token.parse token, **opts
|
17
17
|
}.reject(&:empty?))
|
18
18
|
end
|
19
19
|
end
|
@@ -82,20 +82,20 @@ module Wapiti
|
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
|
-
def to_a(**
|
86
|
-
tokens.map { |tk| tk.to_s(**
|
85
|
+
def to_a(**opts)
|
86
|
+
tokens.map { |tk| tk.to_s(**opts) }
|
87
87
|
end
|
88
88
|
|
89
|
-
def to_s(delimiter: "\n", **
|
90
|
-
tokens.map { |tk| tk.to_s(**
|
89
|
+
def to_s(delimiter: "\n", **opts)
|
90
|
+
tokens.map { |tk| tk.to_s(**opts) }.join(delimiter)
|
91
91
|
end
|
92
92
|
|
93
93
|
def to_sentence(delimiter: ' ')
|
94
94
|
to_s(delimiter: delimiter, expanded: false, tagged: false)
|
95
95
|
end
|
96
96
|
|
97
|
-
def to_h(symbolize_keys: false, **
|
98
|
-
each_segment(**
|
97
|
+
def to_h(symbolize_keys: false, **opts)
|
98
|
+
each_segment(**opts).reduce({}) do |h, (label, segment)|
|
99
99
|
label = label.intern if symbolize_keys
|
100
100
|
h[label] = [] unless h.key? label
|
101
101
|
h[label] << segment
|
data/lib/wapiti/token.rb
CHANGED
@@ -7,7 +7,7 @@ module Wapiti
|
|
7
7
|
attr_accessor :value, :label, :observations, :score
|
8
8
|
|
9
9
|
class << self
|
10
|
-
def parse(string, spacer: /\s+/, tagged: false)
|
10
|
+
def parse(string, spacer: /\s+/, tagged: false, **opts)
|
11
11
|
value, *observations = string.split(spacer)
|
12
12
|
new(value, {
|
13
13
|
label: (tagged ? observations.pop : nil).to_s,
|
@@ -56,8 +56,8 @@ module Wapiti
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
-
def to_s(spacer: ' ', **
|
60
|
-
to_a(**
|
59
|
+
def to_s(spacer: ' ', **opts)
|
60
|
+
to_a(**opts).join(spacer)
|
61
61
|
end
|
62
62
|
|
63
63
|
def to_a(expanded: true, tagged: true, encode: false)
|
data/lib/wapiti/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wapiti
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|