mitie 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 888f1cb252088e179987e76156daf745d4ea7735c69863b8cca376b325c09f55
4
- data.tar.gz: e282533847927dd119ca935e969745dd77603c5367a32542550b5425f10b98c2
3
+ metadata.gz: cf52799fb9a556f5a0fceda3b5c137991a6b49069171bd66f6f5eef1850ac775
4
+ data.tar.gz: ee25f2614c2322f97b23c46bfc112b102937b806ab977b418d8be9062c3c202d
5
5
  SHA512:
6
- metadata.gz: a2fc5998dd1811288b9be022f8dd181865c711aa3b46feac8f339affb2e4befaaed78aa06ab49bd791bf99f1dff117e3604ddd593c40d8374819babe23a95ae2
7
- data.tar.gz: 167f9eace3b0fe9ef1863598244802a62650c57457922a04793f1ef9a22e1668f3ad12ac9ac7702cd43063d3197b1e55a6e4be96a9bfd3932a7549870d5302a4
6
+ metadata.gz: e2d30ca8a73053b3f00f426011bd1cca4d08c003c56d191ecbab45a950597ca98eb717f6f3d6e5347976b4f747defaa7efab63210bd5e73455feb591f04e2703
7
+ data.tar.gz: 299c393aaab960b9da2fef2975326b02e2163b9d7a54de14648a532a0da80ea200bc026b6383bd57bc0a07b46489a7abc82f4f5e9c419f97c7322a1ec4b12fbd
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.2 (2025-05-04)
2
+
3
+ - Fixed memory leaks
4
+
1
5
  ## 0.3.1 (2024-12-29)
2
6
 
3
7
  - Fixed warning with Ruby 3.4
@@ -4,7 +4,7 @@ module Mitie
4
4
  if path
5
5
  # better error message
6
6
  raise ArgumentError, "File does not exist" unless File.exist?(path)
7
- @pointer = FFI.mitie_load_binary_relation_detector(path)
7
+ @pointer = FFI.mitie_load_binary_relation_detector(+path)
8
8
  @pointer.free = FFI["mitie_free"]
9
9
  elsif pointer
10
10
  @pointer = pointer
@@ -36,7 +36,7 @@ module Mitie
36
36
  end
37
37
 
38
38
  def save_to_disk(filename)
39
- if FFI.mitie_save_binary_relation_detector(filename, pointer) != 0
39
+ if FFI.mitie_save_binary_relation_detector(+filename, pointer) != 0
40
40
  raise Error, "Unable to save detector"
41
41
  end
42
42
  nil
@@ -58,8 +58,9 @@ module Mitie
58
58
  entity2[:token_index],
59
59
  entity2[:token_length]
60
60
  )
61
+ relation.free = FFI["mitie_free"]
61
62
 
62
- score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
63
+ score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
63
64
  status = FFI.mitie_classify_binary_relation(pointer, relation, score_ptr)
64
65
  raise Error, "Bad status: #{status}" if status != 0
65
66
 
@@ -71,8 +72,6 @@ module Mitie
71
72
  score: score
72
73
  }
73
74
  end
74
- ensure
75
- FFI.mitie_free(relation) if relation
76
75
  end
77
76
  end
78
77
  end
@@ -1,7 +1,7 @@
1
1
  module Mitie
2
2
  class BinaryRelationTrainer
3
3
  def initialize(ner, name: "")
4
- @pointer = FFI.mitie_create_binary_relation_trainer(name, ner.pointer)
4
+ @pointer = FFI.mitie_create_binary_relation_trainer(+name, ner.pointer)
5
5
  @pointer.free = FFI["mitie_free"]
6
6
  end
7
7
 
@@ -22,7 +22,7 @@ module Mitie
22
22
  loop do
23
23
  token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
24
24
  break if token.null?
25
- offset = (offsets_ptr.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!")
25
+ offset = (offsets_ptr + i * Fiddle::SIZEOF_LONG).to_str(Fiddle::SIZEOF_LONG).unpack1("L!")
26
26
  tokens << [token.to_s.force_encoding(text.encoding), offset]
27
27
  i += 1
28
28
  end
@@ -36,6 +36,7 @@ module Mitie
36
36
  entities = []
37
37
  tokens = tokens_with_offset
38
38
  detections = FFI.mitie_extract_entities(pointer, tokens_ptr)
39
+ detections.free = FFI["mitie_free"]
39
40
  num_detections = FFI.mitie_ner_get_num_detections(detections)
40
41
  num_detections.times do |i|
41
42
  pos = FFI.mitie_ner_get_detection_position(detections, i)
@@ -60,8 +61,6 @@ module Mitie
60
61
  entities << entity
61
62
  end
62
63
  entities
63
- ensure
64
- FFI.mitie_free(detections) if detections
65
64
  end
66
65
  end
67
66
 
@@ -85,20 +84,15 @@ module Mitie
85
84
  tokens_ptr = Utils.array_to_pointer(text)
86
85
  [tokens_ptr, nil]
87
86
  else
88
- offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
89
- tokens_ptr = FFI.mitie_tokenize_with_offsets(text, offsets_ptr)
87
+ offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
88
+ tokens_ptr = FFI.mitie_tokenize_with_offsets(+text, offsets_ptr)
90
89
  tokens_ptr.free = FFI["mitie_free"]
91
-
92
- ObjectSpace.define_finalizer(offsets_ptr, self.class.finalize_ptr(offsets_ptr))
90
+ offsets_ptr = offsets_ptr.ptr
91
+ offsets_ptr.free = FFI["mitie_free"]
93
92
 
94
93
  [tokens_ptr, offsets_ptr]
95
94
  end
96
95
  end
97
96
  end
98
-
99
- def self.finalize_ptr(pointer)
100
- # must use proc instead of stabby lambda
101
- proc { FFI.mitie_free(pointer.ptr) }
102
- end
103
97
  end
104
98
  end
data/lib/mitie/ner.rb CHANGED
@@ -6,7 +6,7 @@ module Mitie
6
6
  if path
7
7
  # better error message
8
8
  raise ArgumentError, "File does not exist" unless File.exist?(path)
9
- @pointer = FFI.mitie_load_named_entity_extractor(path)
9
+ @pointer = FFI.mitie_load_named_entity_extractor(+path)
10
10
  @pointer.free = FFI["mitie_free"]
11
11
  elsif pointer
12
12
  @pointer = pointer
@@ -30,7 +30,7 @@ module Mitie
30
30
  end
31
31
 
32
32
  def save_to_disk(filename)
33
- if FFI.mitie_save_named_entity_extractor(filename, pointer) != 0
33
+ if FFI.mitie_save_named_entity_extractor(+filename, pointer) != 0
34
34
  raise Error, "Unable to save model"
35
35
  end
36
36
  nil
@@ -2,7 +2,7 @@ module Mitie
2
2
  class NERTrainer
3
3
  def initialize(filename)
4
4
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
5
- @pointer = FFI.mitie_create_ner_trainer(filename)
5
+ @pointer = FFI.mitie_create_ner_trainer(+filename)
6
6
  @pointer.free = FFI["mitie_free"]
7
7
  end
8
8
 
@@ -15,7 +15,7 @@ module Mitie
15
15
 
16
16
  raise ArgumentError, "Range overlaps existing entity" if overlaps_any_entity?(range)
17
17
 
18
- unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, label).zero?
18
+ unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, +label).zero?
19
19
  raise Error, "Unable to add entity to training instance. Probably ran out of RAM."
20
20
  end
21
21
 
@@ -4,7 +4,7 @@ module Mitie
4
4
  if path
5
5
  # better error message
6
6
  raise ArgumentError, "File does not exist" unless File.exist?(path)
7
- @pointer = FFI.mitie_load_text_categorizer(path)
7
+ @pointer = FFI.mitie_load_text_categorizer(+path)
8
8
  @pointer.free = FFI["mitie_free"]
9
9
  elsif pointer
10
10
  @pointer = pointer
@@ -16,24 +16,23 @@ module Mitie
16
16
  def categorize(text)
17
17
  tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
18
18
  tokens_pointer = Utils.array_to_pointer(tokens)
19
- text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
20
- text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
19
+ text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
20
+ text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
21
21
 
22
22
  if FFI.mitie_categorize_text(@pointer, tokens_pointer, text_tag, text_score) != 0
23
23
  raise Error, "Unable to categorize"
24
24
  end
25
+ text_tag = text_tag.ptr
26
+ text_tag.free = FFI["mitie_free"]
25
27
 
26
28
  {
27
- tag: text_tag.ptr.to_s,
29
+ tag: text_tag.to_s,
28
30
  score: Utils.read_double(text_score)
29
31
  }
30
- ensure
31
- # text_tag must be freed
32
- FFI.mitie_free(text_tag.ptr) if text_tag
33
32
  end
34
33
 
35
34
  def save_to_disk(filename)
36
- if FFI.mitie_save_text_categorizer(filename, @pointer) != 0
35
+ if FFI.mitie_save_text_categorizer(+filename, @pointer) != 0
37
36
  raise Error, "Unable to save model"
38
37
  end
39
38
  nil
@@ -2,14 +2,14 @@ module Mitie
2
2
  class TextCategorizerTrainer
3
3
  def initialize(filename)
4
4
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
5
- @pointer = FFI.mitie_create_text_categorizer_trainer(filename)
5
+ @pointer = FFI.mitie_create_text_categorizer_trainer(+filename)
6
6
  @pointer.free = FFI["mitie_free"]
7
7
  end
8
8
 
9
9
  def add(text, label)
10
10
  tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
11
11
  tokens_pointer = Utils.array_to_pointer(tokens)
12
- FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, label)
12
+ FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, +label)
13
13
  end
14
14
 
15
15
  def beta
data/lib/mitie/utils.rb CHANGED
@@ -2,10 +2,12 @@ module Mitie
2
2
  module Utils
3
3
  def self.array_to_pointer(text)
4
4
  # malloc uses memset to set all bytes to 0
5
- tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1))
5
+ tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1), Fiddle::RUBY_FREE)
6
+ text_ptrs = text.map { |v| Fiddle::Pointer[v] }
6
7
  text.size.times do |i|
7
- tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = Fiddle::Pointer.to_ptr(text[i]).ref
8
+ tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = text_ptrs[i].ref
8
9
  end
10
+ tokens_ptr.instance_variable_set(:@mitie_refs, text_ptrs)
9
11
  tokens_ptr
10
12
  end
11
13
 
@@ -16,7 +18,7 @@ module Mitie
16
18
  end
17
19
 
18
20
  def self.read_double(ptr)
19
- ptr.to_s(Fiddle::SIZEOF_DOUBLE).unpack1("d")
21
+ ptr.to_str(Fiddle::SIZEOF_DOUBLE).unpack1("d")
20
22
  end
21
23
  end
22
24
  end
data/lib/mitie/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Mitie
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
data/lib/mitie.rb CHANGED
@@ -39,20 +39,18 @@ module Mitie
39
39
 
40
40
  class << self
41
41
  def tokenize(text)
42
- tokens_ptr = FFI.mitie_tokenize(text.to_s)
42
+ tokens_ptr = FFI.mitie_tokenize(+text.to_s)
43
+ tokens_ptr.free = FFI["mitie_free"]
43
44
  tokens = read_tokens(tokens_ptr)
44
45
  tokens.each { |t| t.force_encoding(text.encoding) }
45
46
  tokens
46
- ensure
47
- FFI.mitie_free(tokens_ptr) if tokens_ptr
48
47
  end
49
48
 
50
49
  def tokenize_file(filename)
51
50
  raise ArgumentError, "File does not exist" unless File.exist?(filename)
52
- tokens_ptr = FFI.mitie_tokenize_file(filename)
51
+ tokens_ptr = FFI.mitie_tokenize_file(+filename)
52
+ tokens_ptr.free = FFI["mitie_free"]
53
53
  read_tokens(tokens_ptr)
54
- ensure
55
- FFI.mitie_free(tokens_ptr) if tokens_ptr
56
54
  end
57
55
 
58
56
  private
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mitie
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2024-12-29 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: fiddle
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.6.2
69
+ rubygems_version: 3.6.7
70
70
  specification_version: 4
71
71
  summary: Named-entity recognition for Ruby
72
72
  test_files: []