wapiti 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +6 -0
- data/README.md +26 -11
- data/ext/wapiti/native.c +9 -3
- data/lib/wapiti/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1b615815731e51a90b7043b7ad91c97f0386bf99435c78f18ab23cf9df212cc
|
4
|
+
data.tar.gz: ee11a7e6f76f382847f721f8388b9c7f314e46a90ab391b463aa7e60ef641d41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7852d9566407e93c9d66a641dd8551e3d4666449b9aad01ef434f822c78955d887911e59adb0418751ec4183d1ffa32504ec525fe3b16dbf05018c650c143c2
|
7
|
+
data.tar.gz: '0762885a4ef9265917756d983932da613ebb2cf32c9f18db76afcdac933ae710fe457ba8b431bdf537b986c9c54f7a9908b605627a72e000dd59f6f9cebab034'
|
data/HISTORY.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
|
+
1.0.5 / 2019-05-13
|
2
|
+
==================
|
3
|
+
* Updated examples in README.md to current API
|
4
|
+
* Fix crash when Model#label(input, check: true) is called on input with new labels
|
5
|
+
|
1
6
|
1.0.3 / 2018-07-10
|
2
7
|
==================
|
3
8
|
* Fix compile time format-security
|
9
|
+
|
4
10
|
1.0.0 / 2017-12-xx
|
5
11
|
==================
|
6
12
|
* Added support for Windows platform
|
data/README.md
CHANGED
@@ -23,9 +23,12 @@ Quickstart
|
|
23
23
|
|
24
24
|
### Creating a Model
|
25
25
|
|
26
|
+
You can run the following examples starting the ruby interpreter (irb or pry) inside spec/fixtures directory.
|
27
|
+
|
26
28
|
Using a pattern and training data stored in a file:
|
27
29
|
|
28
|
-
|
30
|
+
require 'wapiti'
|
31
|
+
model = Wapiti.train('chtrain.txt', pattern: 'chpattern.txt')
|
29
32
|
#=> #<Wapiti::Model:0x0000010188f868>
|
30
33
|
model.labels
|
31
34
|
#=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
|
@@ -34,11 +37,21 @@ Using a pattern and training data stored in a file:
|
|
34
37
|
|
35
38
|
Alternatively, you can pass in the training data as a `Wapiti::Dataset`;
|
36
39
|
this class supports the default text format used by Wapiti as well as
|
37
|
-
|
40
|
+
additional formats (such as YAML or XML) and an API, to make it easier
|
38
41
|
to manage data sets used for input and training.
|
39
42
|
|
40
|
-
|
41
|
-
|
43
|
+
options = {threads:3, pattern: 'chpattern.txt'}
|
44
|
+
|
45
|
+
data_text = Wapiti::Dataset.open('chtrain.txt',tagged:true)
|
46
|
+
model2= Wapiti.train(data_text,options)
|
47
|
+
model2.labels
|
48
|
+
=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
|
49
|
+
|
50
|
+
options = {threads:3, pattern: 'chpattern_only_tag.txt'}
|
51
|
+
|
52
|
+
data_xml = Wapiti::Dataset.open('chtrain.xml')
|
53
|
+
#=> #<Wapiti::Dataset sequences={823}>
|
54
|
+
model3 = Wapiti.train(data_xml, options)
|
42
55
|
|
43
56
|
You can consult the `Wapiti::Options.attribute_names` class for a list of
|
44
57
|
supported configuration options and `Wapiti::Options.algorithms` for
|
@@ -64,9 +77,9 @@ Before saving your model you can use `compact` to reduce the model's size:
|
|
64
77
|
|
65
78
|
By calling `#label` on a Model instance you can add labels to a dataset:
|
66
79
|
|
67
|
-
model = Wapiti.load('
|
68
|
-
input = Wapiti::Dataset.
|
69
|
-
output = model.label(input
|
80
|
+
model = Wapiti.load('ch.mod')
|
81
|
+
input = Wapiti::Dataset.open('chtrain.txt',tagged:true)
|
82
|
+
output = model.label(input)
|
70
83
|
|
71
84
|
The result is a new `Wapiti::Dataset` with the predicted labels for each
|
72
85
|
token. If your input data was already tagged, you can compare the input
|
@@ -87,9 +100,11 @@ when calling `#label`), the score for each label will be appended to
|
|
87
100
|
each token/label tuple as a floating point number or passed as a third
|
88
101
|
argument to the passed-in block.
|
89
102
|
|
90
|
-
model.label input, score: true
|
103
|
+
output_with_score = model.label input, score: true
|
91
104
|
# => Dataset where each token will include a score
|
92
|
-
|
105
|
+
output_with_score.first.map(&:score)
|
106
|
+
# => [5.950832716249245, 8.870883529621942, ...]
|
107
|
+
|
93
108
|
### Statistics
|
94
109
|
|
95
110
|
By setting the *:check* option you can tell Wapiti to keep statistics during
|
@@ -105,8 +120,8 @@ are also available through the associated attribute readers).
|
|
105
120
|
|
106
121
|
model.label input, check: true
|
107
122
|
model.stats
|
108
|
-
=> {:token=>{:count=>
|
109
|
-
|
123
|
+
=> {:token=>{:count=>19172, :errors=>36, :rate=>0.18777383684539956},
|
124
|
+
:sequence=>{:count=>823, :errors=>28, :rate=>3.402187120291616}}
|
110
125
|
|
111
126
|
For convenience, you can also use the `#check` method, which
|
112
127
|
will reset the counters, check your input, and return the stats.
|
data/ext/wapiti/native.c
CHANGED
@@ -911,7 +911,6 @@ static VALUE model_labels(VALUE self) {
|
|
911
911
|
qrk_t *lp = model->reader->lbl;
|
912
912
|
|
913
913
|
VALUE labels = rb_ary_new2(Y);
|
914
|
-
|
915
914
|
for (unsigned int i = 0; i < Y; ++i) {
|
916
915
|
rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
|
917
916
|
}
|
@@ -979,14 +978,21 @@ static VALUE decode_sequence(VALUE self, mdl_t *model, raw_t *raw) {
|
|
979
978
|
}
|
980
979
|
|
981
980
|
// Statistics
|
981
|
+
|
982
982
|
if (model->opt->check) {
|
983
983
|
int err = 0;
|
984
|
+
uint32_t lbl = 0;
|
984
985
|
|
985
986
|
for (t = 0; t < T; ++t) {
|
986
|
-
|
987
|
+
lbl = seq->pos[t].lbl;
|
988
|
+
|
989
|
+
// ((uint32_t)-1) is a magic value for no asigned token
|
990
|
+
if (lbl != ((uint32_t)-1)) {
|
991
|
+
stat[0][lbl]++;
|
992
|
+
}
|
987
993
|
stat[1][out[t * N]]++;
|
988
994
|
|
989
|
-
if (
|
995
|
+
if (lbl != out[t * N]) {
|
990
996
|
terr++;
|
991
997
|
err = 1;
|
992
998
|
} else {
|
data/lib/wapiti/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wapiti
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|