phonetics 1.9.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c81fa9abec7c3cdd12bf84e2cd01a7dba6c8f5b66068c0dde27b23f7d3c137f6
4
- data.tar.gz: 3d337a57a1eccadeb84a58ddd2e7cb0f9ea43b6ae505189376d4b97d457f5f71
3
+ metadata.gz: 32e8dd70cfccdfd6826023da278e2d04c75fc6e7658b5bc64af44830a0ae5668
4
+ data.tar.gz: 7dfd507a3fcb7914ee9eaba8049bc286068265399ee580a8c2ffb8ae8ecbd061
5
5
  SHA512:
6
- metadata.gz: 657e10eb57cef58a84080445215362b4cf3906aa7ac556535ff869d621ba8f76707a03a5430bfa31909874f0fb937ee10b64cb9218544eeba3824ba17069fb3d
7
- data.tar.gz: f3cbf639f3b915f4fd5832dc66a9b0c0254058ad1d4b12a6ac86b15e35cc41c9bfdd6162526a3c3c7f7bd26a6c9e4b270eceb4a918fd866c17b6c2828c07acce
6
+ metadata.gz: 10c9e15ec47748cf1c508538950fb090477e6762a584466bd9abcc7ffd81fff2497041adf77d190ba6adcb7ef73774502941faeff0edfcf048f7ac650336ab0a
7
+ data.tar.gz: 7cb06657f02ff1bad1547eb948b412876855d6e0967f927d44512e7d9b84b66e1d8c9ffb79f81e5001442d36bcbc37d8282aa80b3c1bbebecf25f2f0f3fb0849
@@ -1,16 +1,13 @@
1
1
  name: Ruby Gem
2
2
 
3
3
  on:
4
- pull_request:
5
- branches:
6
- - master
7
4
  push:
8
5
  branches:
9
6
  - master
10
7
 
11
8
  jobs:
12
9
  build:
13
- name: Build + Publish
10
+ name: gem publishing
14
11
  runs-on: ubuntu-latest
15
12
 
16
13
  steps:
@@ -18,7 +15,7 @@ jobs:
18
15
  - name: Set up Ruby 2.6
19
16
  uses: actions/setup-ruby@v1
20
17
  with:
21
- version: 2.6.x
18
+ ruby-version: 2.6.x
22
19
 
23
20
  - name: Publish to RubyGems
24
21
  run: |
@@ -26,7 +23,6 @@ jobs:
26
23
  touch $HOME/.gem/credentials
27
24
  chmod 0600 $HOME/.gem/credentials
28
25
  printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials
29
- gem build *.gemspec
30
- gem push *.gem
26
+ bin/gempush-if-changed
31
27
  env:
32
28
  GEM_HOST_API_KEY: ${{secrets.RUBYGEMS_AUTH_TOKEN}}
@@ -0,0 +1,23 @@
1
+ name: Tests
2
+
3
+ on: push
4
+
5
+ jobs:
6
+ ci:
7
+ name: CI
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ ruby:
12
+ - '2.3.x'
13
+ - '2.4.x'
14
+ - '2.5.x'
15
+ - '2.6.x'
16
+ steps:
17
+ - uses: actions/checkout@master
18
+ - name: Setup ruby
19
+ uses: actions/setup-ruby@v1
20
+ with:
21
+ ruby-version: ${{ matrix.ruby }}
22
+ architecture: 'x64'
23
+ - run: gem install bundler && bundle && bundle exec rake
data/Gemfile CHANGED
@@ -2,7 +2,5 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- # gem 'ruby-prof-flamegraph', path: '/www/ruby-prof-flamegraph'
6
-
7
5
  # Specify your gem's dependencies in phonetics.gemspec
8
6
  gemspec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.9.0
1
+ 2.0.1
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ set -x
4
+ if git diff --name-only HEAD..HEAD^ | egrep -q '^VERSION$'; then
5
+ # The VERSION file changed in the last commit, build the gem and push
6
+ gem build *.gemspec
7
+ gem push *.gem
8
+ fi
@@ -7,7 +7,13 @@
7
7
  #include "./next_phoneme_length.h"
8
8
  #include "./phonetic_cost.h"
9
9
 
10
+ // #define DEBUG
11
+
12
+ #ifdef DEBUG
10
13
  #define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
14
+ #else
15
+ #define debug(M, ...)
16
+ #endif
11
17
 
12
18
  VALUE Binding = Qnil;
13
19
 
@@ -15,8 +21,8 @@ VALUE Binding = Qnil;
15
21
 
16
22
  void Init_c_levenshtein();
17
23
 
18
- void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes, int string2_phoneme_count, int64_t *string2_phonemes, bool verbose);
19
- void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
24
+ void set_initial(float *d, int string1_phoneme_count, uint64_t *string1_phonemes, int string2_phoneme_count, uint64_t *string2_phonemes, bool verbose);
25
+ void print_matrix(float *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
20
26
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
21
27
 
22
28
  /* Function implemitations */
@@ -41,11 +47,11 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
41
47
  int string1[string1_length + 1];
42
48
  int string2[string2_length + 1];
43
49
 
44
- double *d; // The (flattened) 2-dimensional matrix
50
+ float *d; // The (flattened) 2-dimensional matrix
45
51
  // underlying this algorithm
46
52
 
47
- double distance; // Return value of this function
48
- double min, delete, // Reusable cost calculations
53
+ float distance; // Return value of this function
54
+ float min, delete, // Reusable cost calculations
49
55
  insert, replace,
50
56
  cost;
51
57
  int i, j; // Frequently overwritten loop vars
@@ -64,34 +70,22 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
64
70
  }
65
71
 
66
72
  find_phonemes(string1, string1_length, &string1_phoneme_count, string1_phoneme_sizes);
67
- int64_t string1_phonemes[string1_phoneme_count];
73
+ uint64_t string1_phonemes[string1_phoneme_count];
74
+ set_phonemes(string1_phonemes, string1, string1_phoneme_count, string1_phoneme_sizes);
68
75
 
69
76
  find_phonemes(string2, string2_length, &string2_phoneme_count, string2_phoneme_sizes);
70
- int64_t string2_phonemes[string2_phoneme_count];
77
+ uint64_t string2_phonemes[string2_phoneme_count];
78
+ set_phonemes(string2_phonemes, string2, string2_phoneme_count, string2_phoneme_sizes);
71
79
 
72
80
  // Guard clauses for empty strings
73
81
  if (string1_phoneme_count == 0 && string2_phoneme_count == 0)
74
82
  return DBL2NUM(0.0);
75
83
 
76
- // Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
77
- // phonemes using just one instruction.
78
- // These 64-bit words are how we implement the lookup table in phonetic_cost
79
- int idx = 0;
80
- for (i = 0; i < string1_phoneme_count; i++) {
81
- for (j = 0; j < string1_phoneme_sizes[j]; j++) {
82
- string1_phonemes[i] = (int) ( string1_phonemes[i] << 8 | string1[idx] );
83
- idx++;
84
- }
85
- }
86
- for (i = 0; i < string2_phoneme_count; i++) {
87
- for (j = 0; j < string2_phoneme_sizes[j]; j++) {
88
- string2_phonemes[i] = (int) ( string2_phonemes[i] << 8 | string2[idx] );
89
- idx++;
90
- }
91
- }
84
+ debug("\n");
85
+ debug("distance between 0 and 1 of phoneme1: %f\n", phonetic_cost(string1_phonemes[0], string1_phonemes[1]));
92
86
 
93
87
  // one-dimensional representation of 2 dimensional array
94
- d = calloc((string1_phoneme_count+1) * (string2_phoneme_count+1), sizeof(double));
88
+ d = calloc((string1_phoneme_count+1) * (string2_phoneme_count+1), sizeof(float));
95
89
 
96
90
  // First, set the top row and left column of the matrix using the sequential
97
91
  // phonetic edit distance of string1 and string2, respectively
@@ -115,34 +109,34 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
115
109
  // plus the phonetic distance between the sound we're moving from to the
116
110
  // new one.
117
111
 
118
- // debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
112
+ debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
119
113
 
120
114
  cost = phonetic_cost(string1_phonemes[i-1], string2_phonemes[j-1]);
121
115
 
122
116
  insert = d[j*(string1_phoneme_count+1) + i-1];
123
- // debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
117
+ debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
124
118
  min = insert;
125
- // debug("min (insert): %f\n", min);
119
+ debug("min (insert): %f\n", min);
126
120
 
127
121
  delete = d[(j-1)*(string1_phoneme_count+1) + i];
128
- // debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
122
+ debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
129
123
  if (delete < min) {
130
- // debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
124
+ debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
131
125
  min = delete;
132
126
  }
133
127
 
134
128
  replace = d[(j-1)*(string1_phoneme_count+1) + i-1];
135
- // debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
129
+ debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
136
130
  if (replace < min) {
137
- // debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
131
+ debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
138
132
  min = replace;
139
133
  }
140
134
 
141
135
  d[(j * (string1_phoneme_count+1)) + i] = min + cost;
142
- // debug("\n");
143
- // if (verbose) {
144
- // print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
145
- // }
136
+ debug("\n");
137
+ if (verbose) {
138
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
139
+ }
146
140
 
147
141
  }
148
142
  }
@@ -166,9 +160,9 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
166
160
  // Subsequent values are the cumulative phonetic distance between each
167
161
  // phoneme within the same string.
168
162
  // "aek" -> [0.0, 1.0, 1.61, 2.61]
169
- void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes, int string2_phoneme_count, int64_t *string2_phonemes, bool verbose) {
163
+ void set_initial(float *d, int string1_phoneme_count, uint64_t *string1_phonemes, int string2_phoneme_count, uint64_t *string2_phonemes, bool verbose) {
170
164
 
171
- double initial_distance;
165
+ float initial_distance;
172
166
  int i, j;
173
167
 
174
168
  if (string1_phoneme_count == 0 || string2_phoneme_count == 0) {
@@ -178,7 +172,7 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
178
172
  }
179
173
 
180
174
  // The top-left is 0, the cell to the right and down are each 1 to start
181
- d[0] = (double) 0.0;
175
+ d[0] = (float) 0.0;
182
176
  if (string1_phoneme_count > 0) {
183
177
  d[1] = initial_distance;
184
178
  }
@@ -186,16 +180,12 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
186
180
  d[string1_phoneme_count+1] = initial_distance;
187
181
  }
188
182
 
189
- debug("string1 phoneme count: %d\n", string1_phoneme_count);
190
-
191
183
  for (i=2; i <= string1_phoneme_count; i++) {
192
184
  // The cost of adding the next phoneme is the cost so far plus the phonetic
193
185
  // distance between the previous one and the current one.
194
186
  d[i] = d[i-1] + phonetic_cost(string1_phonemes[i-2], string1_phonemes[i-1]);
195
187
  }
196
188
 
197
- debug("string2 phoneme count: %d\n", string2_phoneme_count);
198
-
199
189
  for (j=2; j <= string2_phoneme_count; j++) {
200
190
  // The same exact pattern down the left side of the matrix
201
191
  d[j * (string1_phoneme_count+1)] = d[(j - 1) * (string1_phoneme_count+1)] + phonetic_cost(string2_phonemes[j-2], string2_phonemes[j-1]);
@@ -203,13 +193,12 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
203
193
  }
204
194
 
205
195
  // A handy visualization for developers
206
- void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
196
+ void print_matrix(float *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
207
197
 
208
198
  int i, j;
209
199
  int string1_offset = 0;
210
200
  int string2_offset = 0;
211
201
 
212
- return;
213
202
  if (!verbose)
214
203
  return;
215
204
 
@@ -1,4 +1,5 @@
1
- // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:158
1
+ // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:163
2
+ #include <stdio.h>
2
3
  int next_phoneme_length(int *string, int cursor, int length) {
3
4
 
4
5
  int max_length;
@@ -1,8 +1,13 @@
1
1
  #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <stdint.h>
2
4
  #include "./next_phoneme_length.h"
5
+
3
6
  void find_phonemes(int *string, int string_length, int *count, int *lengths) {
4
- int i = 0;
5
7
  int length;
8
+ int i;
9
+
10
+ i = 0;
6
11
  while (i < string_length) {
7
12
  length = next_phoneme_length(string, i, string_length);
8
13
  if (length) {
@@ -14,6 +19,21 @@ void find_phonemes(int *string, int string_length, int *count, int *lengths) {
14
19
  }
15
20
  }
16
21
 
22
+ // Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
23
+ // phonemes using just one instruction.
24
+ // These 64-bit words are how we implement the lookup table in phonetic_cost
25
+ void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths) {
26
+ int idx = 0;
27
+ int i, j;
28
+ for (i = 0; i < count; i++) {
29
+ phonemes[i] = 0;
30
+ for (j = 0; j < lengths[i]; j++) {
31
+ phonemes[i] = (uint64_t) ( phonemes[i] << 8 | string[idx] );
32
+ idx++;
33
+ }
34
+ }
35
+ }
36
+
17
37
  void print_phoneme(int *string, int offset, int length, int padding) {
18
38
  int p;
19
39
  int max = padding;
@@ -1,2 +1,3 @@
1
1
  void find_phonemes(int *string, int string_length, int *count, int *lengths);
2
2
  void print_phoneme(int *string, int offset, int length, int padding);
3
+ void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths);
@@ -1,7 +1,12 @@
1
1
 
2
2
  // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:110
3
3
  #include <stdint.h>
4
+ #include <stdio.h>
5
+ #include <inttypes.h>
4
6
  float phonetic_cost(int64_t phoneme1, int64_t phoneme2) {
7
+ if (phoneme1 == phoneme2) {
8
+ return (float) 0.0;
9
+ }
5
10
 
6
11
  switch (phoneme1) {
7
12
  case 0b01101001:
@@ -95,7 +95,7 @@ module Phonetics
95
95
  # switch (phoneme1) {
96
96
  # case 'ɪ': // two bytes: [201, 170]
97
97
  # // vowel features: {"F1":300,"F2":2100,"rounded":false}
98
- #
98
+ #
99
99
  # switch(phoneme2) {
100
100
  # 'i': // one byte: [105]
101
101
  # // vowel features: {"F1":240,"F2":2400,"rounded":false}
@@ -109,7 +109,12 @@ module Phonetics
109
109
 
110
110
  // This is compiled from Ruby, in #{ruby_source}
111
111
  #include <stdint.h>
112
+ #include <stdio.h>
113
+ #include <inttypes.h>
112
114
  float phonetic_cost(int64_t phoneme1, int64_t phoneme2) {
115
+ if (phoneme1 == phoneme2) {
116
+ return (float) 0.0;
117
+ }
113
118
 
114
119
  HEADER
115
120
 
@@ -117,14 +122,14 @@ module Phonetics
117
122
  Phonetics.phonemes.each do |phoneme1|
118
123
  write " case #{binary(phoneme1)}:"
119
124
  describe(phoneme1, 2)
120
- write " switch(phoneme2) {"
125
+ write ' switch(phoneme2) {'
121
126
  Phonetics.distance_map[phoneme1].each do |phoneme2, distance|
122
127
  write " case #{binary(phoneme2)}:"
123
128
  describe(phoneme2, 6)
124
129
  write " return (float) #{distance};"
125
130
  write ' break;'
126
131
  end
127
- write " }"
132
+ write ' }'
128
133
  write ' break;'
129
134
  end
130
135
  write ' }'
@@ -156,6 +161,7 @@ module Phonetics
156
161
  def generate
157
162
  write(<<-HEADER.gsub(/^ {6}/, ''))
158
163
  // This is compiled from Ruby, in #{ruby_source}
164
+ #include <stdio.h>
159
165
  int next_phoneme_length(int *string, int cursor, int length) {
160
166
 
161
167
  int max_length;
@@ -26,5 +26,4 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency 'rake-compiler'
27
27
  spec.add_development_dependency 'rspec'
28
28
  spec.add_development_dependency 'rubocop'
29
- spec.add_development_dependency 'ruby-prof'
30
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phonetics
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Danger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-30 00:00:00.000000000 Z
11
+ date: 2019-10-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: ruby-prof
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
97
  description: tools for linguistic code using the International Phonetic Alphabet
112
98
  email:
113
99
  - github@jackcanty.com
@@ -117,6 +103,7 @@ extensions:
117
103
  extra_rdoc_files: []
118
104
  files:
119
105
  - ".github/workflows/gempush.yml"
106
+ - ".github/workflows/test.yml"
120
107
  - ".gitignore"
121
108
  - ".rspec"
122
109
  - ".rubocop.yml"
@@ -133,6 +120,7 @@ files:
133
120
  - _site/phonetic_levenshtein_example.png
134
121
  - _site/vowel_chart_b_words.jpg
135
122
  - bin/console
123
+ - bin/gempush-if-changed
136
124
  - ext/c_levenshtein/extconf.rb
137
125
  - ext/c_levenshtein/levenshtein.c
138
126
  - ext/c_levenshtein/next_phoneme_length.c