mitie 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 888f1cb252088e179987e76156daf745d4ea7735c69863b8cca376b325c09f55
4
- data.tar.gz: e282533847927dd119ca935e969745dd77603c5367a32542550b5425f10b98c2
3
+ metadata.gz: da12926ff7be8d2b00da34c945544b556c88a9ba6eeb8292d22feb946a39c1fa
4
+ data.tar.gz: 2f32b90e2b81f6838e43a097a804b0442730b345eccf90bcabcdb181c0a83d1a
5
5
  SHA512:
6
- metadata.gz: a2fc5998dd1811288b9be022f8dd181865c711aa3b46feac8f339affb2e4befaaed78aa06ab49bd791bf99f1dff117e3604ddd593c40d8374819babe23a95ae2
7
- data.tar.gz: 167f9eace3b0fe9ef1863598244802a62650c57457922a04793f1ef9a22e1668f3ad12ac9ac7702cd43063d3197b1e55a6e4be96a9bfd3932a7549870d5302a4
6
+ metadata.gz: fd2506381d76577a692d31f8fd126e2ab77d86b6e6d7cc7d7b8826c79462c642653f195732ee346ec6e00364ce145fc5e36b28ef0f9e7f2d0a564479829f912c
7
+ data.tar.gz: 17b1fc7618990192f1f0cdd28a619caf245c94ebbb0799fa9e32a2b86562484d1e420321464b57b97ff3cefa016e9abfbee556c3fffb5a9cdfd72db89686a468
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.3.3 (2025-11-10)
2
+
3
+ - Added ARM shared library for Linux
4
+
5
+ ## 0.3.2 (2025-05-04)
6
+
7
+ - Fixed memory leaks
8
+
1
9
  ## 0.3.1 (2024-12-29)
2
10
 
3
11
  - Fixed warning with Ruby 3.4
@@ -4,7 +4,7 @@ module Mitie
4
4
  if path
5
5
  # better error message
6
6
  raise ArgumentError, "File does not exist" unless File.exist?(path)
7
- @pointer = FFI.mitie_load_binary_relation_detector(path)
7
+ @pointer = FFI.mitie_load_binary_relation_detector(+path)
8
8
  @pointer.free = FFI["mitie_free"]
9
9
  elsif pointer
10
10
  @pointer = pointer
@@ -36,7 +36,7 @@ module Mitie
36
36
  end
37
37
 
38
38
  def save_to_disk(filename)
39
- if FFI.mitie_save_binary_relation_detector(filename, pointer) != 0
39
+ if FFI.mitie_save_binary_relation_detector(+filename, pointer) != 0
40
40
  raise Error, "Unable to save detector"
41
41
  end
42
42
  nil
@@ -58,8 +58,9 @@ module Mitie
58
58
  entity2[:token_index],
59
59
  entity2[:token_length]
60
60
  )
61
+ relation.free = FFI["mitie_free"]
61
62
 
62
- score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
63
+ score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
63
64
  status = FFI.mitie_classify_binary_relation(pointer, relation, score_ptr)
64
65
  raise Error, "Bad status: #{status}" if status != 0
65
66
 
@@ -71,8 +72,6 @@ module Mitie
71
72
  score: score
72
73
  }
73
74
  end
74
- ensure
75
- FFI.mitie_free(relation) if relation
76
75
  end
77
76
  end
78
77
  end
@@ -1,7 +1,7 @@
1
1
  module Mitie
2
2
  class BinaryRelationTrainer
3
3
  def initialize(ner, name: "")
4
- @pointer = FFI.mitie_create_binary_relation_trainer(name, ner.pointer)
4
+ @pointer = FFI.mitie_create_binary_relation_trainer(+name, ner.pointer)
5
5
  @pointer.free = FFI["mitie_free"]
6
6
  end
7
7
 
@@ -22,7 +22,7 @@ module Mitie
22
22
  loop do
23
23
  token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
24
24
  break if token.null?
25
- offset = (offsets_ptr.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!")
25
+ offset = (offsets_ptr + i * Fiddle::SIZEOF_LONG).to_str(Fiddle::SIZEOF_LONG).unpack1("L!")
26
26
  tokens << [token.to_s.force_encoding(text.encoding), offset]
27
27
  i += 1
28
28
  end
@@ -36,6 +36,7 @@ module Mitie
36
36
  entities = []
37
37
  tokens = tokens_with_offset
38
38
  detections = FFI.mitie_extract_entities(pointer, tokens_ptr)
39
+ detections.free = FFI["mitie_free"]
39
40
  num_detections = FFI.mitie_ner_get_num_detections(detections)
40
41
  num_detections.times do |i|
41
42
  pos = FFI.mitie_ner_get_detection_position(detections, i)
@@ -60,8 +61,6 @@ module Mitie
60
61
  entities << entity
61
62
  end
62
63
  entities
63
- ensure
64
- FFI.mitie_free(detections) if detections
65
64
  end
66
65
  end
67
66
 
@@ -85,20 +84,15 @@ module Mitie
85
84
  tokens_ptr = Utils.array_to_pointer(text)
86
85
  [tokens_ptr, nil]
87
86
  else
88
- offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
89
- tokens_ptr = FFI.mitie_tokenize_with_offsets(text, offsets_ptr)
87
+ offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
88
+ tokens_ptr = FFI.mitie_tokenize_with_offsets(+text, offsets_ptr)
90
89
  tokens_ptr.free = FFI["mitie_free"]
91
-
92
- ObjectSpace.define_finalizer(offsets_ptr, self.class.finalize_ptr(offsets_ptr))
90
+ offsets_ptr = offsets_ptr.ptr
91
+ offsets_ptr.free = FFI["mitie_free"]
93
92
 
94
93
  [tokens_ptr, offsets_ptr]
95
94
  end
96
95
  end
97
96
  end
98
-
99
- def self.finalize_ptr(pointer)
100
- # must use proc instead of stabby lambda
101
- proc { FFI.mitie_free(pointer.ptr) }
102
- end
103
97
  end
104
98
  end
data/lib/mitie/ner.rb CHANGED
@@ -6,7 +6,7 @@ module Mitie
6
6
  if path
7
7
  # better error message
8
8
  raise ArgumentError, "File does not exist" unless File.exist?(path)
9
- @pointer = FFI.mitie_load_named_entity_extractor(path)
9
+ @pointer = FFI.mitie_load_named_entity_extractor(+path)
10
10
  @pointer.free = FFI["mitie_free"]
11
11
  elsif pointer
12
12
  @pointer = pointer
@@ -30,7 +30,7 @@ module Mitie
30
30
  end
31
31
 
32
32
  def save_to_disk(filename)
33
- if FFI.mitie_save_named_entity_extractor(filename, pointer) != 0
33
+ if FFI.mitie_save_named_entity_extractor(+filename, pointer) != 0
34
34
  raise Error, "Unable to save model"
35
35
  end
36
36
  nil
@@ -2,7 +2,7 @@ module Mitie
2
2
  class NERTrainer
3
3
  def initialize(filename)
4
4
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
5
- @pointer = FFI.mitie_create_ner_trainer(filename)
5
+ @pointer = FFI.mitie_create_ner_trainer(+filename)
6
6
  @pointer.free = FFI["mitie_free"]
7
7
  end
8
8
 
@@ -15,7 +15,7 @@ module Mitie
15
15
 
16
16
  raise ArgumentError, "Range overlaps existing entity" if overlaps_any_entity?(range)
17
17
 
18
- unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, label).zero?
18
+ unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, +label).zero?
19
19
  raise Error, "Unable to add entity to training instance. Probably ran out of RAM."
20
20
  end
21
21
 
@@ -4,7 +4,7 @@ module Mitie
4
4
  if path
5
5
  # better error message
6
6
  raise ArgumentError, "File does not exist" unless File.exist?(path)
7
- @pointer = FFI.mitie_load_text_categorizer(path)
7
+ @pointer = FFI.mitie_load_text_categorizer(+path)
8
8
  @pointer.free = FFI["mitie_free"]
9
9
  elsif pointer
10
10
  @pointer = pointer
@@ -16,24 +16,23 @@ module Mitie
16
16
  def categorize(text)
17
17
  tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
18
18
  tokens_pointer = Utils.array_to_pointer(tokens)
19
- text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
20
- text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
19
+ text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
20
+ text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
21
21
 
22
22
  if FFI.mitie_categorize_text(@pointer, tokens_pointer, text_tag, text_score) != 0
23
23
  raise Error, "Unable to categorize"
24
24
  end
25
+ text_tag = text_tag.ptr
26
+ text_tag.free = FFI["mitie_free"]
25
27
 
26
28
  {
27
- tag: text_tag.ptr.to_s,
29
+ tag: text_tag.to_s,
28
30
  score: Utils.read_double(text_score)
29
31
  }
30
- ensure
31
- # text_tag must be freed
32
- FFI.mitie_free(text_tag.ptr) if text_tag
33
32
  end
34
33
 
35
34
  def save_to_disk(filename)
36
- if FFI.mitie_save_text_categorizer(filename, @pointer) != 0
35
+ if FFI.mitie_save_text_categorizer(+filename, @pointer) != 0
37
36
  raise Error, "Unable to save model"
38
37
  end
39
38
  nil
@@ -2,14 +2,14 @@ module Mitie
2
2
  class TextCategorizerTrainer
3
3
  def initialize(filename)
4
4
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
5
- @pointer = FFI.mitie_create_text_categorizer_trainer(filename)
5
+ @pointer = FFI.mitie_create_text_categorizer_trainer(+filename)
6
6
  @pointer.free = FFI["mitie_free"]
7
7
  end
8
8
 
9
9
  def add(text, label)
10
10
  tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
11
11
  tokens_pointer = Utils.array_to_pointer(tokens)
12
- FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, label)
12
+ FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, +label)
13
13
  end
14
14
 
15
15
  def beta
data/lib/mitie/utils.rb CHANGED
@@ -2,10 +2,12 @@ module Mitie
2
2
  module Utils
3
3
  def self.array_to_pointer(text)
4
4
  # malloc uses memset to set all bytes to 0
5
- tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1))
5
+ tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1), Fiddle::RUBY_FREE)
6
+ text_ptrs = text.map { |v| Fiddle::Pointer[v] }
6
7
  text.size.times do |i|
7
- tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = Fiddle::Pointer.to_ptr(text[i]).ref
8
+ tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = text_ptrs[i].ref
8
9
  end
10
+ tokens_ptr.instance_variable_set(:@mitie_refs, text_ptrs)
9
11
  tokens_ptr
10
12
  end
11
13
 
@@ -16,7 +18,7 @@ module Mitie
16
18
  end
17
19
 
18
20
  def self.read_double(ptr)
19
- ptr.to_s(Fiddle::SIZEOF_DOUBLE).unpack1("d")
21
+ ptr.to_str(Fiddle::SIZEOF_DOUBLE).unpack1("d")
20
22
  end
21
23
  end
22
24
  end
data/lib/mitie/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Mitie
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.3"
3
3
  end
data/lib/mitie.rb CHANGED
@@ -29,7 +29,11 @@ module Mitie
29
29
  "libmitie.dylib"
30
30
  end
31
31
  else
32
- "libmitie.so"
32
+ if RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
33
+ "libmitie.arm64.so"
34
+ else
35
+ "libmitie.so"
36
+ end
33
37
  end
34
38
  vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
35
39
  self.ffi_lib = [vendor_lib]
@@ -39,20 +43,18 @@ module Mitie
39
43
 
40
44
  class << self
41
45
  def tokenize(text)
42
- tokens_ptr = FFI.mitie_tokenize(text.to_s)
46
+ tokens_ptr = FFI.mitie_tokenize(+text.to_s)
47
+ tokens_ptr.free = FFI["mitie_free"]
43
48
  tokens = read_tokens(tokens_ptr)
44
49
  tokens.each { |t| t.force_encoding(text.encoding) }
45
50
  tokens
46
- ensure
47
- FFI.mitie_free(tokens_ptr) if tokens_ptr
48
51
  end
49
52
 
50
53
  def tokenize_file(filename)
51
54
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
52
- tokens_ptr = FFI.mitie_tokenize_file(filename)
55
+ tokens_ptr = FFI.mitie_tokenize_file(+filename)
56
+ tokens_ptr.free = FFI["mitie_free"]
53
57
  read_tokens(tokens_ptr)
54
- ensure
55
- FFI.mitie_free(tokens_ptr) if tokens_ptr
56
58
  end
57
59
 
58
60
  private
Binary file
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mitie
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-12-29 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: fiddle
@@ -45,6 +45,7 @@ files:
45
45
  - lib/mitie/version.rb
46
46
  - vendor/LICENSE.txt
47
47
  - vendor/libmitie.arm64.dylib
48
+ - vendor/libmitie.arm64.so
48
49
  - vendor/libmitie.dylib
49
50
  - vendor/libmitie.so
50
51
  - vendor/mitie.dll
@@ -66,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
67
  - !ruby/object:Gem::Version
67
68
  version: '0'
68
69
  requirements: []
69
- rubygems_version: 3.6.2
70
+ rubygems_version: 3.6.9
70
71
  specification_version: 4
71
72
  summary: Named-entity recognition for Ruby
72
73
  test_files: []