phonetics 1.9.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c81fa9abec7c3cdd12bf84e2cd01a7dba6c8f5b66068c0dde27b23f7d3c137f6
4
- data.tar.gz: 3d337a57a1eccadeb84a58ddd2e7cb0f9ea43b6ae505189376d4b97d457f5f71
3
+ metadata.gz: 32e8dd70cfccdfd6826023da278e2d04c75fc6e7658b5bc64af44830a0ae5668
4
+ data.tar.gz: 7dfd507a3fcb7914ee9eaba8049bc286068265399ee580a8c2ffb8ae8ecbd061
5
5
  SHA512:
6
- metadata.gz: 657e10eb57cef58a84080445215362b4cf3906aa7ac556535ff869d621ba8f76707a03a5430bfa31909874f0fb937ee10b64cb9218544eeba3824ba17069fb3d
7
- data.tar.gz: f3cbf639f3b915f4fd5832dc66a9b0c0254058ad1d4b12a6ac86b15e35cc41c9bfdd6162526a3c3c7f7bd26a6c9e4b270eceb4a918fd866c17b6c2828c07acce
6
+ metadata.gz: 10c9e15ec47748cf1c508538950fb090477e6762a584466bd9abcc7ffd81fff2497041adf77d190ba6adcb7ef73774502941faeff0edfcf048f7ac650336ab0a
7
+ data.tar.gz: 7cb06657f02ff1bad1547eb948b412876855d6e0967f927d44512e7d9b84b66e1d8c9ffb79f81e5001442d36bcbc37d8282aa80b3c1bbebecf25f2f0f3fb0849
@@ -1,16 +1,13 @@
1
1
  name: Ruby Gem
2
2
 
3
3
  on:
4
- pull_request:
5
- branches:
6
- - master
7
4
  push:
8
5
  branches:
9
6
  - master
10
7
 
11
8
  jobs:
12
9
  build:
13
- name: Build + Publish
10
+ name: gem publishing
14
11
  runs-on: ubuntu-latest
15
12
 
16
13
  steps:
@@ -18,7 +15,7 @@ jobs:
18
15
  - name: Set up Ruby 2.6
19
16
  uses: actions/setup-ruby@v1
20
17
  with:
21
- version: 2.6.x
18
+ ruby-version: 2.6.x
22
19
 
23
20
  - name: Publish to RubyGems
24
21
  run: |
@@ -26,7 +23,6 @@ jobs:
26
23
  touch $HOME/.gem/credentials
27
24
  chmod 0600 $HOME/.gem/credentials
28
25
  printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials
29
- gem build *.gemspec
30
- gem push *.gem
26
+ bin/gempush-if-changed
31
27
  env:
32
28
  GEM_HOST_API_KEY: ${{secrets.RUBYGEMS_AUTH_TOKEN}}
@@ -0,0 +1,23 @@
1
+ name: Tests
2
+
3
+ on: push
4
+
5
+ jobs:
6
+ ci:
7
+ name: CI
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ matrix:
11
+ ruby:
12
+ - '2.3.x'
13
+ - '2.4.x'
14
+ - '2.5.x'
15
+ - '2.6.x'
16
+ steps:
17
+ - uses: actions/checkout@master
18
+ - name: Setup ruby
19
+ uses: actions/setup-ruby@v1
20
+ with:
21
+ ruby-version: ${{ matrix.ruby }}
22
+ architecture: 'x64'
23
+ - run: gem install bundler && bundle && bundle exec rake
data/Gemfile CHANGED
@@ -2,7 +2,5 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- # gem 'ruby-prof-flamegraph', path: '/www/ruby-prof-flamegraph'
6
-
7
5
  # Specify your gem's dependencies in phonetics.gemspec
8
6
  gemspec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.9.0
1
+ 2.0.1
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ set -x
4
+ if git diff --name-only HEAD..HEAD^ | egrep -q '^VERSION$'; then
5
+ # The VERSION file changed in the last commit, build the gem and push
6
+ gem build *.gemspec
7
+ gem push *.gem
8
+ fi
@@ -7,7 +7,13 @@
7
7
  #include "./next_phoneme_length.h"
8
8
  #include "./phonetic_cost.h"
9
9
 
10
+ // #define DEBUG
11
+
12
+ #ifdef DEBUG
10
13
  #define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
14
+ #else
15
+ #define debug(M, ...)
16
+ #endif
11
17
 
12
18
  VALUE Binding = Qnil;
13
19
 
@@ -15,8 +21,8 @@ VALUE Binding = Qnil;
15
21
 
16
22
  void Init_c_levenshtein();
17
23
 
18
- void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes, int string2_phoneme_count, int64_t *string2_phonemes, bool verbose);
19
- void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
24
+ void set_initial(float *d, int string1_phoneme_count, uint64_t *string1_phonemes, int string2_phoneme_count, uint64_t *string2_phonemes, bool verbose);
25
+ void print_matrix(float *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
20
26
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
21
27
 
22
28
  /* Function implemitations */
@@ -41,11 +47,11 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
41
47
  int string1[string1_length + 1];
42
48
  int string2[string2_length + 1];
43
49
 
44
- double *d; // The (flattened) 2-dimensional matrix
50
+ float *d; // The (flattened) 2-dimensional matrix
45
51
  // underlying this algorithm
46
52
 
47
- double distance; // Return value of this function
48
- double min, delete, // Reusable cost calculations
53
+ float distance; // Return value of this function
54
+ float min, delete, // Reusable cost calculations
49
55
  insert, replace,
50
56
  cost;
51
57
  int i, j; // Frequently overwritten loop vars
@@ -64,34 +70,22 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
64
70
  }
65
71
 
66
72
  find_phonemes(string1, string1_length, &string1_phoneme_count, string1_phoneme_sizes);
67
- int64_t string1_phonemes[string1_phoneme_count];
73
+ uint64_t string1_phonemes[string1_phoneme_count];
74
+ set_phonemes(string1_phonemes, string1, string1_phoneme_count, string1_phoneme_sizes);
68
75
 
69
76
  find_phonemes(string2, string2_length, &string2_phoneme_count, string2_phoneme_sizes);
70
- int64_t string2_phonemes[string2_phoneme_count];
77
+ uint64_t string2_phonemes[string2_phoneme_count];
78
+ set_phonemes(string2_phonemes, string2, string2_phoneme_count, string2_phoneme_sizes);
71
79
 
72
80
  // Guard clauses for empty strings
73
81
  if (string1_phoneme_count == 0 && string2_phoneme_count == 0)
74
82
  return DBL2NUM(0.0);
75
83
 
76
- // Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
77
- // phonemes using just one instruction.
78
- // These 64-bit words are how we implement the lookup table in phonetic_cost
79
- int idx = 0;
80
- for (i = 0; i < string1_phoneme_count; i++) {
81
- for (j = 0; j < string1_phoneme_sizes[j]; j++) {
82
- string1_phonemes[i] = (int) ( string1_phonemes[i] << 8 | string1[idx] );
83
- idx++;
84
- }
85
- }
86
- for (i = 0; i < string2_phoneme_count; i++) {
87
- for (j = 0; j < string2_phoneme_sizes[j]; j++) {
88
- string2_phonemes[i] = (int) ( string2_phonemes[i] << 8 | string2[idx] );
89
- idx++;
90
- }
91
- }
84
+ debug("\n");
85
+ debug("distance between 0 and 1 of phoneme1: %f\n", phonetic_cost(string1_phonemes[0], string1_phonemes[1]));
92
86
 
93
87
  // one-dimensional representation of 2 dimensional array
94
- d = calloc((string1_phoneme_count+1) * (string2_phoneme_count+1), sizeof(double));
88
+ d = calloc((string1_phoneme_count+1) * (string2_phoneme_count+1), sizeof(float));
95
89
 
96
90
  // First, set the top row and left column of the matrix using the sequential
97
91
  // phonetic edit distance of string1 and string2, respectively
@@ -115,34 +109,34 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
115
109
  // plus the phonetic distance between the sound we're moving from to the
116
110
  // new one.
117
111
 
118
- // debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
112
+ debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
119
113
 
120
114
  cost = phonetic_cost(string1_phonemes[i-1], string2_phonemes[j-1]);
121
115
 
122
116
  insert = d[j*(string1_phoneme_count+1) + i-1];
123
- // debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
117
+ debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
124
118
  min = insert;
125
- // debug("min (insert): %f\n", min);
119
+ debug("min (insert): %f\n", min);
126
120
 
127
121
  delete = d[(j-1)*(string1_phoneme_count+1) + i];
128
- // debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
122
+ debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
129
123
  if (delete < min) {
130
- // debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
124
+ debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
131
125
  min = delete;
132
126
  }
133
127
 
134
128
  replace = d[(j-1)*(string1_phoneme_count+1) + i-1];
135
- // debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
129
+ debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
136
130
  if (replace < min) {
137
- // debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
131
+ debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
138
132
  min = replace;
139
133
  }
140
134
 
141
135
  d[(j * (string1_phoneme_count+1)) + i] = min + cost;
142
- // debug("\n");
143
- // if (verbose) {
144
- // print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
145
- // }
136
+ debug("\n");
137
+ if (verbose) {
138
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
139
+ }
146
140
 
147
141
  }
148
142
  }
@@ -166,9 +160,9 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
166
160
  // Subsequent values are the cumulative phonetic distance between each
167
161
  // phoneme within the same string.
168
162
  // "aek" -> [0.0, 1.0, 1.61, 2.61]
169
- void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes, int string2_phoneme_count, int64_t *string2_phonemes, bool verbose) {
163
+ void set_initial(float *d, int string1_phoneme_count, uint64_t *string1_phonemes, int string2_phoneme_count, uint64_t *string2_phonemes, bool verbose) {
170
164
 
171
- double initial_distance;
165
+ float initial_distance;
172
166
  int i, j;
173
167
 
174
168
  if (string1_phoneme_count == 0 || string2_phoneme_count == 0) {
@@ -178,7 +172,7 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
178
172
  }
179
173
 
180
174
  // The top-left is 0, the cell to the right and down are each 1 to start
181
- d[0] = (double) 0.0;
175
+ d[0] = (float) 0.0;
182
176
  if (string1_phoneme_count > 0) {
183
177
  d[1] = initial_distance;
184
178
  }
@@ -186,16 +180,12 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
186
180
  d[string1_phoneme_count+1] = initial_distance;
187
181
  }
188
182
 
189
- debug("string1 phoneme count: %d\n", string1_phoneme_count);
190
-
191
183
  for (i=2; i <= string1_phoneme_count; i++) {
192
184
  // The cost of adding the next phoneme is the cost so far plus the phonetic
193
185
  // distance between the previous one and the current one.
194
186
  d[i] = d[i-1] + phonetic_cost(string1_phonemes[i-2], string1_phonemes[i-1]);
195
187
  }
196
188
 
197
- debug("string2 phoneme count: %d\n", string2_phoneme_count);
198
-
199
189
  for (j=2; j <= string2_phoneme_count; j++) {
200
190
  // The same exact pattern down the left side of the matrix
201
191
  d[j * (string1_phoneme_count+1)] = d[(j - 1) * (string1_phoneme_count+1)] + phonetic_cost(string2_phonemes[j-2], string2_phonemes[j-1]);
@@ -203,13 +193,12 @@ void set_initial(double *d, int string1_phoneme_count, int64_t *string1_phonemes
203
193
  }
204
194
 
205
195
  // A handy visualization for developers
206
- void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
196
+ void print_matrix(float *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
207
197
 
208
198
  int i, j;
209
199
  int string1_offset = 0;
210
200
  int string2_offset = 0;
211
201
 
212
- return;
213
202
  if (!verbose)
214
203
  return;
215
204
 
@@ -1,4 +1,5 @@
1
- // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:158
1
+ // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:163
2
+ #include <stdio.h>
2
3
  int next_phoneme_length(int *string, int cursor, int length) {
3
4
 
4
5
  int max_length;
@@ -1,8 +1,13 @@
1
1
  #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <stdint.h>
2
4
  #include "./next_phoneme_length.h"
5
+
3
6
  void find_phonemes(int *string, int string_length, int *count, int *lengths) {
4
- int i = 0;
5
7
  int length;
8
+ int i;
9
+
10
+ i = 0;
6
11
  while (i < string_length) {
7
12
  length = next_phoneme_length(string, i, string_length);
8
13
  if (length) {
@@ -14,6 +19,21 @@ void find_phonemes(int *string, int string_length, int *count, int *lengths) {
14
19
  }
15
20
  }
16
21
 
22
+ // Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
23
+ // phonemes using just one instruction.
24
+ // These 64-bit words are how we implement the lookup table in phonetic_cost
25
+ void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths) {
26
+ int idx = 0;
27
+ int i, j;
28
+ for (i = 0; i < count; i++) {
29
+ phonemes[i] = 0;
30
+ for (j = 0; j < lengths[i]; j++) {
31
+ phonemes[i] = (uint64_t) ( phonemes[i] << 8 | string[idx] );
32
+ idx++;
33
+ }
34
+ }
35
+ }
36
+
17
37
  void print_phoneme(int *string, int offset, int length, int padding) {
18
38
  int p;
19
39
  int max = padding;
@@ -1,2 +1,3 @@
1
1
  void find_phonemes(int *string, int string_length, int *count, int *lengths);
2
2
  void print_phoneme(int *string, int offset, int length, int padding);
3
+ void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths);
@@ -1,7 +1,12 @@
1
1
 
2
2
  // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:110
3
3
  #include <stdint.h>
4
+ #include <stdio.h>
5
+ #include <inttypes.h>
4
6
  float phonetic_cost(int64_t phoneme1, int64_t phoneme2) {
7
+ if (phoneme1 == phoneme2) {
8
+ return (float) 0.0;
9
+ }
5
10
 
6
11
  switch (phoneme1) {
7
12
  case 0b01101001:
@@ -95,7 +95,7 @@ module Phonetics
95
95
  # switch (phoneme1) {
96
96
  # case 'ɪ': // two bytes: [201, 170]
97
97
  # // vowel features: {"F1":300,"F2":2100,"rounded":false}
98
- #
98
+ #
99
99
  # switch(phoneme2) {
100
100
  # 'i': // one byte: [105]
101
101
  # // vowel features: {"F1":240,"F2":2400,"rounded":false}
@@ -109,7 +109,12 @@ module Phonetics
109
109
 
110
110
  // This is compiled from Ruby, in #{ruby_source}
111
111
  #include <stdint.h>
112
+ #include <stdio.h>
113
+ #include <inttypes.h>
112
114
  float phonetic_cost(int64_t phoneme1, int64_t phoneme2) {
115
+ if (phoneme1 == phoneme2) {
116
+ return (float) 0.0;
117
+ }
113
118
 
114
119
  HEADER
115
120
 
@@ -117,14 +122,14 @@ module Phonetics
117
122
  Phonetics.phonemes.each do |phoneme1|
118
123
  write " case #{binary(phoneme1)}:"
119
124
  describe(phoneme1, 2)
120
- write " switch(phoneme2) {"
125
+ write ' switch(phoneme2) {'
121
126
  Phonetics.distance_map[phoneme1].each do |phoneme2, distance|
122
127
  write " case #{binary(phoneme2)}:"
123
128
  describe(phoneme2, 6)
124
129
  write " return (float) #{distance};"
125
130
  write ' break;'
126
131
  end
127
- write " }"
132
+ write ' }'
128
133
  write ' break;'
129
134
  end
130
135
  write ' }'
@@ -156,6 +161,7 @@ module Phonetics
156
161
  def generate
157
162
  write(<<-HEADER.gsub(/^ {6}/, ''))
158
163
  // This is compiled from Ruby, in #{ruby_source}
164
+ #include <stdio.h>
159
165
  int next_phoneme_length(int *string, int cursor, int length) {
160
166
 
161
167
  int max_length;
@@ -26,5 +26,4 @@ Gem::Specification.new do |spec|
26
26
  spec.add_development_dependency 'rake-compiler'
27
27
  spec.add_development_dependency 'rspec'
28
28
  spec.add_development_dependency 'rubocop'
29
- spec.add_development_dependency 'ruby-prof'
30
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phonetics
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jack Danger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-30 00:00:00.000000000 Z
11
+ date: 2019-10-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: ruby-prof
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
97
  description: tools for linguistic code using the International Phonetic Alphabet
112
98
  email:
113
99
  - github@jackcanty.com
@@ -117,6 +103,7 @@ extensions:
117
103
  extra_rdoc_files: []
118
104
  files:
119
105
  - ".github/workflows/gempush.yml"
106
+ - ".github/workflows/test.yml"
120
107
  - ".gitignore"
121
108
  - ".rspec"
122
109
  - ".rubocop.yml"
@@ -133,6 +120,7 @@ files:
133
120
  - _site/phonetic_levenshtein_example.png
134
121
  - _site/vowel_chart_b_words.jpg
135
122
  - bin/console
123
+ - bin/gempush-if-changed
136
124
  - ext/c_levenshtein/extconf.rb
137
125
  - ext/c_levenshtein/levenshtein.c
138
126
  - ext/c_levenshtein/next_phoneme_length.c