mini-levenshtein 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ /*
2
+ * This file has been altered to better fit fuzzywuzzy.
3
+ * To se all changes done, please diff this file with
4
+ * <https://github.com/Tmplt/python-Levenshtein/blob/master/Levenshtein.c>
5
+ *
6
+ * Summary:
7
+ * - stripped all python-related code and data types;
8
+ */
9
+
10
+ /* @(#) $Id: Levenshtein.h,v 1.22 2005/01/13 20:02:56 yeti Exp $ */
11
+ #ifndef LEVENSHTEIN_H
12
+ #define LEVENSHTEIN_H
13
+
14
+ #ifndef size_t
15
+ #include <stdlib.h>
16
+ #endif
17
+
18
+ /* A bit dirty. */
19
+ #ifndef _LEV_STATIC_PY
20
+ #define _LEV_STATIC_PY /* */
21
+ #endif
22
+
23
+ /* In C, this is just wchar_t and unsigned char, in Python, lev_wchar can
24
+ * be anything. If you really want to cheat, define wchar_t to any integer
25
+ * type you like before including Levenshtein.h and recompile it. */
26
+ #ifndef lev_wchar
27
+ #ifndef wchar_t
28
+ #include <wchar.h>
29
+ #endif
30
+ #define lev_wchar wchar_t
31
+ #endif
32
+ typedef char lev_byte;
33
+
34
+ /* Edit opration type
35
+ * DON'T CHANGE! used ad arrays indices and the bits are occasionally used
36
+ * as flags */
37
+ typedef enum
38
+ {
39
+ LEV_EDIT_KEEP = 0,
40
+ LEV_EDIT_REPLACE = 1,
41
+ LEV_EDIT_INSERT = 2,
42
+ LEV_EDIT_DELETE = 3,
43
+ LEV_EDIT_LAST /* sometimes returned when an error occurs */
44
+ } LevEditType;
45
+
46
+ /* Error codes returned by editop check functions */
47
+ typedef enum
48
+ {
49
+ LEV_EDIT_ERR_OK = 0,
50
+ LEV_EDIT_ERR_TYPE, /* nonexistent edit type */
51
+ LEV_EDIT_ERR_OUT, /* edit out of string bounds */
52
+ LEV_EDIT_ERR_ORDER, /* ops are not ordered */
53
+ LEV_EDIT_ERR_BLOCK, /* inconsistent block boundaries (block ops) */
54
+ LEV_EDIT_ERR_SPAN, /* sequence is not a full transformation (block ops) */
55
+ LEV_EDIT_ERR_LAST
56
+ } LevEditOpError;
57
+
58
+ /* string averaging method (UNUSED yet) */
59
+ typedef enum
60
+ {
61
+ LEV_AVG_HEAD = 0, /* take operations from the head */
62
+ LEV_AVG_TAIL, /* take operations from the tail */
63
+ LEV_AVG_SPREAD, /* take a equidistantly distributed subset */
64
+ LEV_AVG_BLOCK, /* take a random continuous block */
65
+ LEV_AVG_RANDOM, /* take a random subset */
66
+ LEV_AVG_LAST
67
+ } LevAveragingType;
68
+
69
+ /* Edit operation (atomic).
70
+ * This is the `native' atomic edit operation. It differs from the difflib
71
+ * one's because it represents a change of one character, not a block. And
72
+ * we usually don't care about LEV_EDIT_KEEP, though the functions can handle
73
+ * them. The positions are interpreted as at the left edge of a character.
74
+ */
75
+ typedef struct
76
+ {
77
+ LevEditType type; /* editing operation type */
78
+ size_t spos; /* source block position */
79
+ size_t dpos; /* destination position */
80
+ } LevEditOp;
81
+
82
+ /* Edit operation (difflib-compatible).
83
+ * This is not `native', but conversion functions exist. These fields exactly
84
+ * correspond to the codeops() tuples fields (and this method is also the
85
+ * source of the silly OpCode name). Sequences must span over complete
86
+ * strings, subsequences are simply edit sequences with more (or larger)
87
+ * LEV_EDIT_KEEP blocks.
88
+ */
89
+ typedef struct
90
+ {
91
+ LevEditType type; /* editing operation type */
92
+ size_t sbeg, send; /* source block begin, end */
93
+ size_t dbeg, dend; /* destination block begin, end */
94
+ } LevOpCode;
95
+
96
+ /* Matching block (difflib-compatible). */
97
+ typedef struct
98
+ {
99
+ size_t spos;
100
+ size_t dpos;
101
+ size_t len;
102
+ } LevMatchingBlock;
103
+
104
+ size_t
105
+ lev_edit_distance(size_t len1,
106
+ const lev_byte *string1,
107
+ size_t len2,
108
+ const lev_byte *string2,
109
+ int xcost);
110
+
111
+ size_t
112
+ lev_u_edit_distance(size_t len1,
113
+ const lev_wchar *string1,
114
+ size_t len2,
115
+ const lev_wchar *string2,
116
+ int xcost);
117
+
118
+ LevEditOp *
119
+ lev_editops_find(size_t len1,
120
+ const lev_byte *string1,
121
+ size_t len2,
122
+ const lev_byte *string2,
123
+ size_t *n);
124
+
125
+ LevOpCode *
126
+ lev_editops_to_opcodes(size_t n,
127
+ const LevEditOp *ops,
128
+ size_t *nb,
129
+ size_t len1,
130
+ size_t len2);
131
+
132
+ LevMatchingBlock *
133
+ lev_opcodes_matching_blocks(size_t len1,
134
+ __attribute__((unused)) size_t len2,
135
+ size_t nb,
136
+ const LevOpCode *bops,
137
+ size_t *nmblocks);
138
+
139
+ LevMatchingBlock *
140
+ lev_editops_matching_blocks(size_t len1,
141
+ size_t len2,
142
+ size_t n,
143
+ const LevEditOp *ops,
144
+ size_t *nmblocks);
145
+
146
+ #endif
@@ -0,0 +1,32 @@
1
+ #include <string.h>
2
+ #include <math.h>
3
+ #include <wchar.h>
4
+ #include <stdlib.h>
5
+
6
+ #include "ruby.h"
7
+ #include "levenshtein.h"
8
+
9
+ VALUE MiniLevenshteinInternal = Qnil;
10
+
11
+ VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost);
12
+
13
+ void Init_mini_levenshtein()
14
+ {
15
+ MiniLevenshteinInternal = rb_define_module("MiniLevenshteinInternal");
16
+ rb_define_method(MiniLevenshteinInternal, "internal_distance", method_internal_distance, 3);
17
+ }
18
+
19
+ VALUE method_internal_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
20
+ {
21
+ size_t len1 = RSTRING_LEN(s1);
22
+ size_t len2 = RSTRING_LEN(s2);
23
+
24
+ const lev_byte *str1 = StringValuePtr(s1);
25
+ const lev_byte *str2 = StringValuePtr(s2);
26
+
27
+ int cost = FIX2INT(xcost);
28
+
29
+ long distance = lev_edit_distance(len1, str1, len2, str2, cost);
30
+
31
+ return INT2NUM(distance);
32
+ }
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniLevenshtein
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,33 @@
1
+ require 'mini-levenshtein/mini_levenshtein'
2
+
3
+ module MiniLevenshtein
4
+ extend MiniLevenshteinInternal
5
+
6
+ class << self
7
+ def distance(string1, string2)
8
+ validate_string!(string1)
9
+ validate_string!(string2)
10
+
11
+ internal_distance(string1, string2, 0)
12
+ end
13
+
14
+ def ratio(string1, string2)
15
+ validate_string!(string1)
16
+ validate_string!(string2)
17
+
18
+ lensum = string1.length + string2.length
19
+ return 1.0 if lensum.zero?
20
+
21
+ distance = internal_distance(string1, string2, 1)
22
+ return 1.0 if distance.zero?
23
+
24
+ (lensum - distance) / lensum.to_f
25
+ end
26
+
27
+ private
28
+
29
+ def validate_string!(string)
30
+ raise TypeError, "no implicit conversion of #{string.class} to String" unless string.is_a?(String)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'mini-levenshtein/version'
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = 'mini-levenshtein'
9
+ s.version = MiniLevenshtein::VERSION
10
+ s.authors = ['Delon Newman']
11
+ s.email = 'contact@delonnewman.name'
12
+
13
+ s.summary = 'Simple, fast, levenshtein distance'
14
+ s.description = s.summary
15
+ s.homepage = 'https://github.com/delonnewman/mini-levenshtein'
16
+ s.license = 'MIT'
17
+
18
+ if s.respond_to?(:metadata)
19
+ s.metadata['allowed_push_host'] = 'https://rubygems.org'
20
+
21
+ s.metadata['homepage_uri'] = s.homepage
22
+ s.metadata['source_code_uri'] = s.homepage
23
+ s.metadata['changelog_uri'] = "#{s.homepage}#changelog"
24
+ s.metadata['documentation_uri'] = "https://www.rubydoc.info/gems/#{s.name}"
25
+ else
26
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
27
+ end
28
+
29
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ s.extensions = ['ext/mini_levenshtein/extconf.rb']
31
+ s.require_paths = ['lib']
32
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mini-levenshtein
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Delon Newman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Simple, fast, levenshtein distance
14
+ email: contact@delonnewman.name
15
+ executables: []
16
+ extensions:
17
+ - ext/mini_levenshtein/extconf.rb
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".github/workflows/ruby.yml"
21
+ - ".gitignore"
22
+ - ".rspec"
23
+ - ".ruby-version"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - README.md
27
+ - Rakefile
28
+ - ext/mini_levenshtein/extconf.rb
29
+ - ext/mini_levenshtein/levenshtein.c
30
+ - ext/mini_levenshtein/levenshtein.h
31
+ - ext/mini_levenshtein/mini_levenshtein.c
32
+ - lib/mini-levenshtein.rb
33
+ - lib/mini-levenshtein/version.rb
34
+ - mini-levenshtein.gemspec
35
+ homepage: https://github.com/delonnewman/mini-levenshtein
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ allowed_push_host: https://rubygems.org
40
+ homepage_uri: https://github.com/delonnewman/mini-levenshtein
41
+ source_code_uri: https://github.com/delonnewman/mini-levenshtein
42
+ changelog_uri: https://github.com/delonnewman/mini-levenshtein#changelog
43
+ documentation_uri: https://www.rubydoc.info/gems/mini-levenshtein
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 3.2.3
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: Simple, fast, levenshtein distance
63
+ test_files: []