wapiti 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eefd2c624bb02b635f41b9577d303abc352783d2530ef42f3db7f91db2384174
4
- data.tar.gz: 73c766d6e05599b5167743dfc53daf20f74db0af0fe22d3ea6a947e9882189ab
3
+ metadata.gz: d1b615815731e51a90b7043b7ad91c97f0386bf99435c78f18ab23cf9df212cc
4
+ data.tar.gz: ee11a7e6f76f382847f721f8388b9c7f314e46a90ab391b463aa7e60ef641d41
5
5
  SHA512:
6
- metadata.gz: 012c48b99ce4d6af1223f97fd03308f5af0c833e4db250842546b7f75de830fa700f71d52043fdd6c2b54f1f24cc83c0a0929211bf23153113d6efadf619fb68
7
- data.tar.gz: 1a78e6de9025f6f6e199ff70e0973a7aeb42beda218220f3c63801a8239cd5371bf4546796db82134646d4491fa9cf00072371cfe058a4632ab3946b621a32bd
6
+ metadata.gz: a7852d9566407e93c9d66a641dd8551e3d4666449b9aad01ef434f822c78955d887911e59adb0418751ec4183d1ffa32504ec525fe3b16dbf05018c650c143c2
7
+ data.tar.gz: '0762885a4ef9265917756d983932da613ebb2cf32c9f18db76afcdac933ae710fe457ba8b431bdf537b986c9c54f7a9908b605627a72e000dd59f6f9cebab034'
data/HISTORY.md CHANGED
@@ -1,6 +1,12 @@
1
+ 1.0.5 / 2019-05-13
2
+ ==================
3
+ * Updated examples in README.md to current API
4
+ * Fix crash when Model#label(input, check: true) is called on input with new labels
5
+
1
6
  1.0.3 / 2018-07-10
2
7
  ==================
3
8
  * Fix compile time format-security
9
+
4
10
  1.0.0 / 2017-12-xx
5
11
  ==================
6
12
  * Added support for Windows platform
data/README.md CHANGED
@@ -23,9 +23,12 @@ Quickstart
23
23
 
24
24
  ### Creating a Model
25
25
 
26
+ You can run the following examples starting the ruby interpreter (irb or pry) inside spec/fixtures directory.
27
+
26
28
  Using a pattern and training data stored in a file:
27
29
 
28
- model = Wapiti.train('train.txt', pattern: 'pattern.txt')
30
+ require 'wapiti'
31
+ model = Wapiti.train('chtrain.txt', pattern: 'chpattern.txt')
29
32
  #=> #<Wapiti::Model:0x0000010188f868>
30
33
  model.labels
31
34
  #=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
@@ -34,11 +37,21 @@ Using a pattern and training data stored in a file:
34
37
 
35
38
  Alternatively, you can pass in the training data as a `Wapiti::Dataset`;
36
39
  this class supports the default text format used by Wapiti as well as
37
- additiional formats (such as YAML or XML) and an API to make it easier
40
+ additional formats (such as YAML or XML) and an API, to make it easier
38
41
  to manage data sets used for input and training.
39
42
 
40
- data = Wapiti::Dataset.open('chtrain.xml')
41
- model = Wapiti.train(data, options)
43
+ options = {threads:3, pattern: 'chpattern.txt'}
44
+
45
+ data_text = Wapiti::Dataset.open('chtrain.txt',tagged:true)
46
+ model2= Wapiti.train(data_text,options)
47
+ model2.labels
48
+ => ["B-ADJP", "B-ADVP", "B-CONJP" ...]
49
+
50
+ options = {threads:3, pattern: 'chpattern_only_tag.txt'}
51
+
52
+ data_xml = Wapiti::Dataset.open('chtrain.xml')
53
+ #=> #<Wapiti::Dataset sequences={823}>
54
+ model3 = Wapiti.train(data_xml, options)
42
55
 
43
56
  You can consult the `Wapiti::Options.attribute_names` class for a list of
44
57
  supported configuration options and `Wapiti::Options.algorithms` for
@@ -64,9 +77,9 @@ Before saving your model you can use `compact` to reduce the model's size:
64
77
 
65
78
  By calling `#label` on a Model instance you can add labels to a dataset:
66
79
 
67
- model = Wapiti.load('m2.mod')
68
- input = Wapiti::Dataset.load('chtest.txt')
69
- output = model.label(input, tagged: true)
80
+ model = Wapiti.load('ch.mod')
81
+ input = Wapiti::Dataset.open('chtrain.txt',tagged:true)
82
+ output = model.label(input)
70
83
 
71
84
  The result is a new `Wapiti::Dataset` with the predicted labels for each
72
85
  token. If your input data was already tagged, you can compare the input
@@ -87,9 +100,11 @@ when calling `#label`), the score for each label will be appended to
87
100
  each token/label tuple as a floating point number or passed as a third
88
101
  argument to the passed-in block.
89
102
 
90
- model.label input, score: true
103
+ output_with_score = model.label input, score: true
91
104
  # => Dataset where each token will include a score
92
-
105
+ output_with_score.first.map(&:score)
106
+ # => [5.950832716249245, 8.870883529621942, ...]
107
+
93
108
  ### Statistics
94
109
 
95
110
  By setting the *:check* option you can tell Wapiti to keep statistics during
@@ -105,8 +120,8 @@ are also available through the associated attribute readers).
105
120
 
106
121
  model.label input, check: true
107
122
  model.stats
108
- => {:token=>{:count=>1896, :errors=>137, :rate=>7.225738396624472},
109
- :sequence=>{:count=>77, :errors=>50, :rate=>64.93506493506494}}
123
+ => {:token=>{:count=>19172, :errors=>36, :rate=>0.18777383684539956},
124
+ :sequence=>{:count=>823, :errors=>28, :rate=>3.402187120291616}}
110
125
 
111
126
  For convenience, you can also use the `#check` method, which
112
127
  will reset the counters, check your input, and return the stats.
@@ -911,7 +911,6 @@ static VALUE model_labels(VALUE self) {
911
911
  qrk_t *lp = model->reader->lbl;
912
912
 
913
913
  VALUE labels = rb_ary_new2(Y);
914
-
915
914
  for (unsigned int i = 0; i < Y; ++i) {
916
915
  rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
917
916
  }
@@ -979,14 +978,21 @@ static VALUE decode_sequence(VALUE self, mdl_t *model, raw_t *raw) {
979
978
  }
980
979
 
981
980
  // Statistics
981
+
982
982
  if (model->opt->check) {
983
983
  int err = 0;
984
+ uint32_t lbl = 0;
984
985
 
985
986
  for (t = 0; t < T; ++t) {
986
- stat[0][seq->pos[t].lbl]++;
987
+ lbl = seq->pos[t].lbl;
988
+
989
+ // ((uint32_t)-1) is a magic value for no asigned token
990
+ if (lbl != ((uint32_t)-1)) {
991
+ stat[0][lbl]++;
992
+ }
987
993
  stat[1][out[t * N]]++;
988
994
 
989
- if (seq->pos[t].lbl != out[t * N]) {
995
+ if (lbl != out[t * N]) {
990
996
  terr++;
991
997
  err = 1;
992
998
  } else {
@@ -1,3 +1,3 @@
1
1
  module Wapiti
2
- VERSION = '1.0.4'.freeze
2
+ VERSION = '1.0.5'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wapiti
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-14 00:00:00.000000000 Z
11
+ date: 2019-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: builder