grnexus 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,295 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ffi'
4
+
5
+ module GRNexusTextProcessing
6
+ extend FFI::Library
7
+
8
+ # Detect and load appropriate library
9
+ def self.detect_library
10
+ script_dir = File.dirname(File.expand_path(__FILE__))
11
+ case RbConfig::CONFIG['host_os']
12
+ when /mswin|mingw|cygwin/
13
+ File.join(script_dir, '..', 'exports', 'Windows', 'text_processing.dll')
14
+ when /darwin/
15
+ File.join(script_dir, '..', 'exports', 'Mac', 'text_processing.dylib')
16
+ when /linux/
17
+ File.join(script_dir, '..', 'exports', 'Linux', 'text_processing.so')
18
+ else
19
+ raise "Sistema operativo no soportado: #{RbConfig::CONFIG['host_os']}"
20
+ end
21
+ end
22
+
23
+ ffi_lib detect_library
24
+
25
+ # Define GRNexusData structure
26
+ class GRNexusData < FFI::Struct
27
+ layout :data, :pointer,
28
+ :type, :int,
29
+ :size, :size_t,
30
+ :stride, :size_t,
31
+ :dims, [:size_t, 3]
32
+ end
33
+
34
+ # Define GRNexusVocabulary structure
35
+ class GRNexusVocabulary < FFI::Struct
36
+ layout :tokens, :pointer,
37
+ :indices, :pointer,
38
+ :frequencies, :pointer,
39
+ :idf_scores, :pointer,
40
+ :vocab_size, :size_t,
41
+ :max_token_len, :size_t,
42
+ :total_tokens, :size_t,
43
+ :special_tokens, [:pointer, 10],
44
+ :special_indices, :pointer
45
+ end
46
+
47
+ # Define GRNexusEmbeddings structure
48
+ class GRNexusEmbeddings < FFI::Struct
49
+ layout :embeddings, :pointer,
50
+ :vocab_size, :size_t,
51
+ :embedding_dim, :size_t,
52
+ :norms, :pointer
53
+ end
54
+
55
+ # Define GRNexusTFIDF structure
56
+ class GRNexusTFIDF < FFI::Struct
57
+ layout :tfidf_matrix, :pointer,
58
+ :num_docs, :size_t,
59
+ :vocab_size, :size_t
60
+ end
61
+
62
+ # Function definitions
63
+ attach_function :create_vocabulary_advanced, [:pointer, :size_t, :size_t], :pointer
64
+ attach_function :normalize_vocabulary_advanced, [:string, :pointer, :pointer, :size_t, :string], :int
65
+ attach_function :denormalize_vocabulary, [:pointer, :pointer, :pointer], :int
66
+ attach_function :vectorize_text_tfidf, [:string, :pointer, :pointer, :pointer], :int
67
+ attach_function :create_embeddings, [:pointer, :size_t], :pointer
68
+ attach_function :find_similar_embeddings, [:pointer, :size_t, :pointer, :pointer, :size_t], :int
69
+ attach_function :create_tfidf_model, [:pointer, :pointer, :size_t], :pointer
70
+ attach_function :find_similar_documents, [:pointer, :size_t, :pointer, :pointer, :size_t], :int
71
+ attach_function :free_vocabulary, [:pointer], :void
72
+ attach_function :free_embeddings, [:pointer], :void
73
+ attach_function :free_tfidf, [:pointer], :void
74
+
75
+ # Vocabulary class
76
+ class Vocabulary
77
+ attr_reader :vocab_ptr
78
+
79
+ def initialize(documents, max_vocab_size: 50000)
80
+ # Convert Ruby strings to C strings
81
+ c_docs = FFI::MemoryPointer.new(:pointer, documents.size)
82
+ documents.each_with_index do |doc, i|
83
+ c_docs[i].put_pointer(0, FFI::MemoryPointer.from_string(doc))
84
+ end
85
+
86
+ @vocab_ptr = GRNexusTextProcessing.create_vocabulary_advanced(c_docs, documents.size, max_vocab_size)
87
+ raise 'Failed to create vocabulary' if @vocab_ptr.null?
88
+
89
+ @vocab = GRNexusVocabulary.new(@vocab_ptr)
90
+ ObjectSpace.define_finalizer(self, self.class.finalize(@vocab_ptr))
91
+ end
92
+
93
+ def self.finalize(vocab_ptr)
94
+ proc { GRNexusTextProcessing.free_vocabulary(vocab_ptr) unless vocab_ptr.null? }
95
+ end
96
+
97
+ def size
98
+ @vocab[:vocab_size]
99
+ end
100
+
101
+ def total_tokens
102
+ @vocab[:total_tokens]
103
+ end
104
+
105
+ def get_tokens(max_tokens: 100)
106
+ tokens = []
107
+ tokens_ptr = @vocab[:tokens]
108
+ [max_tokens, size].min.times do |i|
109
+ token_ptr = tokens_ptr.get_pointer(i * FFI::Pointer.size)
110
+ tokens << token_ptr.read_string unless token_ptr.null?
111
+ end
112
+ tokens
113
+ end
114
+
115
+ def normalize_text(text, max_length: 512, strategy: 'pad_right')
116
+ output_data = GRNexusData.new
117
+ output_buffer = FFI::MemoryPointer.new(:double, max_length)
118
+ output_data[:data] = output_buffer
119
+ output_data[:type] = 1
120
+ output_data[:size] = max_length
121
+ output_data[:stride] = 1
122
+ output_data[:dims][0] = max_length
123
+ output_data[:dims][1] = 0
124
+ output_data[:dims][2] = 0
125
+
126
+ result = GRNexusTextProcessing.normalize_vocabulary_advanced(
127
+ text,
128
+ @vocab_ptr,
129
+ output_data.pointer,
130
+ max_length,
131
+ strategy
132
+ )
133
+
134
+ raise "Text normalization failed with code: #{result}" if result != 0
135
+
136
+ output_buffer.read_array_of_double(max_length)
137
+ end
138
+
139
+ def denormalize_indices(indices)
140
+ input_data = GRNexusData.new
141
+ input_buffer = FFI::MemoryPointer.new(:double, indices.size)
142
+ input_buffer.write_array_of_double(indices)
143
+ input_data[:data] = input_buffer
144
+ input_data[:type] = 1
145
+ input_data[:size] = indices.size
146
+ input_data[:stride] = 1
147
+ input_data[:dims][0] = indices.size
148
+ input_data[:dims][1] = 0
149
+ input_data[:dims][2] = 0
150
+
151
+ output_text_ptr = FFI::MemoryPointer.new(:pointer)
152
+
153
+ result = GRNexusTextProcessing.denormalize_vocabulary(
154
+ input_data.pointer,
155
+ @vocab_ptr,
156
+ output_text_ptr
157
+ )
158
+
159
+ raise "Denormalization failed with code: #{result}" if result != 0
160
+
161
+ text = output_text_ptr.read_pointer.read_string
162
+ text
163
+ end
164
+ end
165
+
166
+ # TextVectorizer class
167
+ class TextVectorizer
168
+ def initialize(vocabulary)
169
+ @vocabulary = vocabulary
170
+ end
171
+
172
+ def vectorize(text)
173
+ output_data = GRNexusData.new
174
+ output_buffer = FFI::MemoryPointer.new(:double, @vocabulary.size)
175
+ output_data[:data] = output_buffer
176
+ output_data[:type] = 1
177
+ output_data[:size] = @vocabulary.size
178
+ output_data[:stride] = 1
179
+ output_data[:dims][0] = @vocabulary.size
180
+ output_data[:dims][1] = 0
181
+ output_data[:dims][2] = 0
182
+
183
+ result = GRNexusTextProcessing.vectorize_text_tfidf(
184
+ text,
185
+ @vocabulary.vocab_ptr,
186
+ output_data.pointer,
187
+ nil
188
+ )
189
+
190
+ raise "Vectorization failed with code: #{result}" if result != 0
191
+
192
+ output_buffer.read_array_of_double(@vocabulary.size)
193
+ end
194
+ end
195
+
196
+ # TextEmbeddings class
197
+ class TextEmbeddings
198
+ def initialize(vocabulary, embedding_dim: 100)
199
+ @vocabulary = vocabulary
200
+ @embedding_dim = embedding_dim
201
+ @embeddings_ptr = GRNexusTextProcessing.create_embeddings(vocabulary.vocab_ptr, embedding_dim)
202
+
203
+ raise 'Failed to create embeddings' if @embeddings_ptr.null?
204
+
205
+ ObjectSpace.define_finalizer(self, self.class.finalize(@embeddings_ptr))
206
+ end
207
+
208
+ def self.finalize(embeddings_ptr)
209
+ proc { GRNexusTextProcessing.free_embeddings(embeddings_ptr) unless embeddings_ptr.null? }
210
+ end
211
+
212
+ def find_similar(token_idx, top_k: 10)
213
+ similarities = FFI::MemoryPointer.new(:double, @vocabulary.size)
214
+ similar_indices = FFI::MemoryPointer.new(:size_t, top_k)
215
+
216
+ result = GRNexusTextProcessing.find_similar_embeddings(
217
+ @embeddings_ptr,
218
+ token_idx,
219
+ similarities,
220
+ similar_indices,
221
+ top_k
222
+ )
223
+
224
+ raise "Finding similar embeddings failed with code: #{result}" if result != 0
225
+
226
+ # Read size_t array manually (size_t is typically ulong)
227
+ indices = []
228
+ top_k.times do |i|
229
+ indices << similar_indices.get_ulong(i * FFI.type_size(:size_t))
230
+ end
231
+ sims = indices.map { |i| similarities.get_double(i * 8) }
232
+
233
+ [indices, sims]
234
+ end
235
+ end
236
+
237
+ # TFIDFModel class
238
+ class TFIDFModel
239
+ def initialize(vocabulary, documents)
240
+ @vocabulary = vocabulary
241
+ @num_docs = documents.size
242
+
243
+ # Convert documents to C strings
244
+ c_docs = FFI::MemoryPointer.new(:pointer, documents.size)
245
+ documents.each_with_index do |doc, i|
246
+ c_docs[i].put_pointer(0, FFI::MemoryPointer.from_string(doc))
247
+ end
248
+
249
+ @tfidf_ptr = GRNexusTextProcessing.create_tfidf_model(vocabulary.vocab_ptr, c_docs, documents.size)
250
+
251
+ raise 'Failed to create TF-IDF model' if @tfidf_ptr.null?
252
+
253
+ ObjectSpace.define_finalizer(self, self.class.finalize(@tfidf_ptr))
254
+ end
255
+
256
+ def self.finalize(tfidf_ptr)
257
+ proc { GRNexusTextProcessing.free_tfidf(tfidf_ptr) unless tfidf_ptr.null? }
258
+ end
259
+
260
+ def find_similar_documents(doc_idx, top_k: 5)
261
+ similarities = FFI::MemoryPointer.new(:double, @num_docs)
262
+ similar_docs = FFI::MemoryPointer.new(:size_t, top_k)
263
+
264
+ result = GRNexusTextProcessing.find_similar_documents(
265
+ @tfidf_ptr,
266
+ doc_idx,
267
+ similarities,
268
+ similar_docs,
269
+ top_k
270
+ )
271
+
272
+ raise "Finding similar documents failed with code: #{result}" if result != 0
273
+
274
+ # Read size_t array manually (size_t is typically ulong)
275
+ indices = []
276
+ top_k.times do |i|
277
+ indices << similar_docs.get_ulong(i * FFI.type_size(:size_t))
278
+ end
279
+ sims = indices.map { |i| similarities.get_double(i * 8) }
280
+
281
+ [indices, sims]
282
+ end
283
+ end
284
+
285
+ # Legacy placeholder classes for compatibility
286
+ class TextProcessor
287
+ def process(text)
288
+ raise NotImplementedError, 'Debes implementar el método de procesamiento de texto'
289
+ end
290
+ end
291
+
292
+ class Tokenizer < TextProcessor; end
293
+ class WordEmbedding < TextProcessor; end
294
+ class TextNormalization < TextProcessor; end
295
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: grnexus
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - GR Code Digital Solutions
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.15'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '1.15'
26
+ - !ruby/object:Gem::Dependency
27
+ name: json
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rake
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '13.0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '13.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: minitest
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '5.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '5.0'
68
+ description: " GRNexus is a revolutionary cross-language AI platform that combines
69
+ the elegance \n of Ruby with the raw power of native C acceleration. Train models
70
+ in Ruby and \n deploy them in Python (or vice versa) with full compatibility.
71
+ Features include \n 35+ activation functions, 12+ layer types, complete NLP pipeline,
72
+ 40+ numeric \n operations, and intelligent training callbacks. 10-100x faster
73
+ than pure Ruby \n implementations thanks to native C core.\n"
74
+ email:
75
+ - gr.code.studio@gmail.com
76
+ executables: []
77
+ extensions: []
78
+ extra_rdoc_files:
79
+ - LICENSE
80
+ - README.md
81
+ files:
82
+ - LICENSE
83
+ - README.md
84
+ - exports/Linux/libgrnexus.so
85
+ - exports/Mac/activations.dylib
86
+ - exports/Mac/grnexus_core.dylib
87
+ - exports/Mac/machine_learning.dylib
88
+ - exports/Mac/normalization.dylib
89
+ - exports/Mac/numeric_proccessing.dylib
90
+ - exports/Mac/text_processing.dylib
91
+ - exports/Windows/activations.dll
92
+ - exports/Windows/grnexus_core.dll
93
+ - exports/Windows/machine_learning.dll
94
+ - exports/Windows/normalization.dll
95
+ - exports/Windows/numeric_proccessing.dll
96
+ - exports/Windows/text_processing.dll
97
+ - lib/grnexus.rb
98
+ - lib/grnexus_activations.rb
99
+ - lib/grnexus_callbacks.rb
100
+ - lib/grnexus_core.rb
101
+ - lib/grnexus_layers.rb
102
+ - lib/grnexus_machine_learning.rb
103
+ - lib/grnexus_normalization.rb
104
+ - lib/grnexus_numeric_proccessing.rb
105
+ - lib/grnexus_text_proccessing.rb
106
+ homepage: https://github.com/grcodedigitalsolutions/GRNexus
107
+ licenses:
108
+ - GPL-3.0-or-later
109
+ metadata:
110
+ homepage_uri: https://github.com/grcodedigitalsolutions/GRNexus
111
+ bug_tracker_uri: https://github.com/grcodedigitalsolutions/GRNexus/issues
112
+ documentation_uri: https://github.com/grcodedigitalsolutions/GRNexus/blob/main/ruby/README.md
113
+ source_code_uri: https://github.com/grcodedigitalsolutions/GRNexus/tree/main/ruby
114
+ changelog_uri: https://github.com/grcodedigitalsolutions/GRNexus/blob/main/CHANGELOG.md
115
+ post_install_message: " \n \n
116
+ \ GRNexus v1.0.2 installed successfully! \n \n
117
+ \ The Ultimate Cross-Language Neural Network Framework \n \n
118
+ \ Features: \n • 35+
119
+ Activation Functions (GELU, Swish, Mish, etc.) \n • 12+ Layer Types (Dense,
120
+ Conv2D, LSTM, GRU, etc.) \n • Complete NLP Pipeline (Tokenization,
121
+ Embeddings, etc.) \n • 40+ Numeric Operations \n
122
+ \ • Cross-Language Model Compatibility (Ruby ↔ Python) \n • Native
123
+ C Acceleration (10-100x faster) \n \n
124
+ \ IMPORTANT: Native Libraries Required \n This gem
125
+ uses FFI to call native C libraries for \n performance. Pre-compiled
126
+ binaries are included for: \n • Linux (.so) \n
127
+ \ • macOS (.dylib) \n • Windows
128
+ (.dll) \n \n
129
+ \ Get started: \n require
130
+ 'grnexus' \n \n
131
+ \ Documentation: https://github.com/grcodedigitalsolutions/GRNexus \n\n \n"
132
+ rdoc_options: []
133
+ require_paths:
134
+ - lib
135
+ required_ruby_version: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: 3.0.0
140
+ required_rubygems_version: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ requirements: []
146
+ rubygems_version: 3.7.2
147
+ specification_version: 4
148
+ summary: High-performance cross-language neural network framework
149
+ test_files: []