mini-levenshtein 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,146 @@
1
+ /*
2
+ * This file has been altered to better fit fuzzywuzzy.
3
+ * To se all changes done, please diff this file with
4
+ * <https://github.com/Tmplt/python-Levenshtein/blob/master/Levenshtein.c>
5
+ *
6
+ * Summary:
7
+ * - stripped all python-related code and data types;
8
+ */
9
+
10
+ /* @(#) $Id: Levenshtein.h,v 1.22 2005/01/13 20:02:56 yeti Exp $ */
11
+ #ifndef LEVENSHTEIN_H
12
+ #define LEVENSHTEIN_H
13
+
14
+ #ifndef size_t
15
+ #include <stdlib.h>
16
+ #endif
17
+
18
+ /* A bit dirty. */
19
+ #ifndef _LEV_STATIC_PY
20
+ #define _LEV_STATIC_PY /* */
21
+ #endif
22
+
23
+ /* In C, this is just wchar_t and unsigned char, in Python, lev_wchar can
24
+ * be anything. If you really want to cheat, define wchar_t to any integer
25
+ * type you like before including Levenshtein.h and recompile it. */
26
+ #ifndef lev_wchar
27
+ #ifndef wchar_t
28
+ #include <wchar.h>
29
+ #endif
30
+ #define lev_wchar wchar_t
31
+ #endif
32
+ typedef char lev_byte;
33
+
34
+ /* Edit opration type
35
+ * DON'T CHANGE! used ad arrays indices and the bits are occasionally used
36
+ * as flags */
37
+ typedef enum
38
+ {
39
+ LEV_EDIT_KEEP = 0,
40
+ LEV_EDIT_REPLACE = 1,
41
+ LEV_EDIT_INSERT = 2,
42
+ LEV_EDIT_DELETE = 3,
43
+ LEV_EDIT_LAST /* sometimes returned when an error occurs */
44
+ } LevEditType;
45
+
46
+ /* Error codes returned by editop check functions */
47
+ typedef enum
48
+ {
49
+ LEV_EDIT_ERR_OK = 0,
50
+ LEV_EDIT_ERR_TYPE, /* nonexistent edit type */
51
+ LEV_EDIT_ERR_OUT, /* edit out of string bounds */
52
+ LEV_EDIT_ERR_ORDER, /* ops are not ordered */
53
+ LEV_EDIT_ERR_BLOCK, /* inconsistent block boundaries (block ops) */
54
+ LEV_EDIT_ERR_SPAN, /* sequence is not a full transformation (block ops) */
55
+ LEV_EDIT_ERR_LAST
56
+ } LevEditOpError;
57
+
58
+ /* string averaging method (UNUSED yet) */
59
+ typedef enum
60
+ {
61
+ LEV_AVG_HEAD = 0, /* take operations from the head */
62
+ LEV_AVG_TAIL, /* take operations from the tail */
63
+ LEV_AVG_SPREAD, /* take a equidistantly distributed subset */
64
+ LEV_AVG_BLOCK, /* take a random continuous block */
65
+ LEV_AVG_RANDOM, /* take a random subset */
66
+ LEV_AVG_LAST
67
+ } LevAveragingType;
68
+
69
+ /* Edit operation (atomic).
70
+ * This is the `native' atomic edit operation. It differs from the difflib
71
+ * one's because it represents a change of one character, not a block. And
72
+ * we usually don't care about LEV_EDIT_KEEP, though the functions can handle
73
+ * them. The positions are interpreted as at the left edge of a character.
74
+ */
75
+ typedef struct
76
+ {
77
+ LevEditType type; /* editing operation type */
78
+ size_t spos; /* source block position */
79
+ size_t dpos; /* destination position */
80
+ } LevEditOp;
81
+
82
+ /* Edit operation (difflib-compatible).
83
+ * This is not `native', but conversion functions exist. These fields exactly
84
+ * correspond to the codeops() tuples fields (and this method is also the
85
+ * source of the silly OpCode name). Sequences must span over complete
86
+ * strings, subsequences are simply edit sequences with more (or larger)
87
+ * LEV_EDIT_KEEP blocks.
88
+ */
89
+ typedef struct
90
+ {
91
+ LevEditType type; /* editing operation type */
92
+ size_t sbeg, send; /* source block begin, end */
93
+ size_t dbeg, dend; /* destination block begin, end */
94
+ } LevOpCode;
95
+
96
+ /* Matching block (difflib-compatible). */
97
+ typedef struct
98
+ {
99
+ size_t spos;
100
+ size_t dpos;
101
+ size_t len;
102
+ } LevMatchingBlock;
103
+
104
+ size_t
105
+ lev_edit_distance(size_t len1,
106
+ const lev_byte *string1,
107
+ size_t len2,
108
+ const lev_byte *string2,
109
+ int xcost);
110
+
111
+ size_t
112
+ lev_u_edit_distance(size_t len1,
113
+ const lev_wchar *string1,
114
+ size_t len2,
115
+ const lev_wchar *string2,
116
+ int xcost);
117
+
118
+ LevEditOp *
119
+ lev_editops_find(size_t len1,
120
+ const lev_byte *string1,
121
+ size_t len2,
122
+ const lev_byte *string2,
123
+ size_t *n);
124
+
125
+ LevOpCode *
126
+ lev_editops_to_opcodes(size_t n,
127
+ const LevEditOp *ops,
128
+ size_t *nb,
129
+ size_t len1,
130
+ size_t len2);
131
+
132
+ LevMatchingBlock *
133
+ lev_opcodes_matching_blocks(size_t len1,
134
+ __attribute__((unused)) size_t len2,
135
+ size_t nb,
136
+ const LevOpCode *bops,
137
+ size_t *nmblocks);
138
+
139
+ LevMatchingBlock *
140
+ lev_editops_matching_blocks(size_t len1,
141
+ size_t len2,
142
+ size_t n,
143
+ const LevEditOp *ops,
144
+ size_t *nmblocks);
145
+
146
+ #endif
@@ -0,0 +1,32 @@
1
+ #include <string.h>
2
+ #include <math.h>
3
+ #include <wchar.h>
4
+ #include <stdlib.h>
5
+
6
+ #include "ruby.h"
7
+ #include "levenshtein.h"
8
+
9
+ VALUE MiniLevenshteinInternal = Qnil;
10
+
11
+ VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost);
12
+
13
+ void Init_mini_levenshtein()
14
+ {
15
+ MiniLevenshteinInternal = rb_define_module("MiniLevenshteinInternal");
16
+ rb_define_method(MiniLevenshteinInternal, "internal_distance", method_internal_distance, 3);
17
+ }
18
+
19
+ VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
20
+ {
21
+ size_t len1 = RSTRING_LEN(s1);
22
+ size_t len2 = RSTRING_LEN(s2);
23
+
24
+ const lev_byte *str1 = StringValuePtr(s1);
25
+ const lev_byte *str2 = StringValuePtr(s2);
26
+
27
+ int cost = FIX2INT(xcost);
28
+
29
+ long distance = lev_edit_distance(len1, str1, len2, str2, cost);
30
+
31
+ return INT2NUM(distance);
32
+ }
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniLevenshtein
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,33 @@
1
+ require 'mini-levenshtein/mini_levenshtein'
2
+
3
+ module MiniLevenshtein
4
+ extend MiniLevenshteinInternal
5
+
6
+ class << self
7
+ def distance(string1, string2)
8
+ validate_string!(string1)
9
+ validate_string!(string2)
10
+
11
+ internal_distance(string1, string2, 0)
12
+ end
13
+
14
+ def ratio(string1, string2)
15
+ validate_string!(string1)
16
+ validate_string!(string2)
17
+
18
+ lensum = string1.length + string2.length
19
+ return 1.0 if lensum.zero?
20
+
21
+ distance = internal_distance(string1, string2, 1)
22
+ return 1.0 if distance.zero?
23
+
24
+ (lensum - distance) / lensum.to_f
25
+ end
26
+
27
+ private
28
+
29
+ def validate_string!(string)
30
+ raise TypeError, "no implicit conversion of #{string.class} to String" unless string.is_a?(String)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'mini-levenshtein/version'
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = 'mini-levenshtein'
9
+ s.version = MiniLevenshtein::VERSION
10
+ s.authors = ['Delon Newman']
11
+ s.email = 'contact@delonnewman.name'
12
+
13
+ s.summary = 'Simple, fast, levenshtein distance'
14
+ s.description = s.summary
15
+ s.homepage = 'https://github.com/delonnewman/mini-levenshtein'
16
+ s.license = 'MIT'
17
+
18
+ if s.respond_to?(:metadata)
19
+ s.metadata['allowed_push_host'] = 'https://rubygems.org'
20
+
21
+ s.metadata['homepage_uri'] = s.homepage
22
+ s.metadata['source_code_uri'] = s.homepage
23
+ s.metadata['changelog_uri'] = "#{s.homepage}#changelog"
24
+ s.metadata['documentation_uri'] = "https://www.rubydoc.info/gems/#{s.name}"
25
+ else
26
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
27
+ end
28
+
29
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ s.extensions = ['ext/mini_levenshtein/extconf.rb']
31
+ s.require_paths = ['lib']
32
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini-levenshtein
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Delon Newman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Simple, fast, levenshtein distance
14
+ email: contact@delonnewman.name
15
+ executables: []
16
+ extensions:
17
+ - ext/mini_levenshtein/extconf.rb
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".github/workflows/ruby.yml"
21
+ - ".gitignore"
22
+ - ".rspec"
23
+ - ".ruby-version"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - ext/mini_levenshtein/extconf.rb
29
+ - ext/mini_levenshtein/levenshtein.c
30
+ - ext/mini_levenshtein/levenshtein.h
31
+ - ext/mini_levenshtein/mini_levenshtein.c
32
+ - lib/mini-levenshtein.rb
33
+ - lib/mini-levenshtein/version.rb
34
+ - mini-levenshtein.gemspec
35
+ homepage: https://github.com/delonnewman/mini-levenshtein
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ allowed_push_host: https://rubygems.org
40
+ homepage_uri: https://github.com/delonnewman/mini-levenshtein
41
+ source_code_uri: https://github.com/delonnewman/mini-levenshtein
42
+ changelog_uri: https://github.com/delonnewman/mini-levenshtein#changelog
43
+ documentation_uri: https://www.rubydoc.info/gems/mini-levenshtein
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.2.3
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Simple, fast, levenshtein distance
63
+ test_files: []