chroma-db 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +13 -1
- data/lib/chroma/api_operations/request.rb +125 -0
- data/lib/chroma/chroma.rb +29 -0
- data/lib/chroma/chroma_configuration.rb +75 -0
- data/lib/chroma/errors.rb +71 -0
- data/lib/chroma/resources/collection.rb +384 -0
- data/lib/chroma/resources/database.rb +69 -0
- data/lib/chroma/resources/embedding.rb +23 -0
- data/lib/chroma/util.rb +70 -0
- data/lib/chroma/version.rb +1 -1
- data/lib/chroma-db.rb +18 -0
- data/notebook/Chroma Gem.ipynb +851 -0
- data/notebook/ruby.txt +58 -0
- metadata +13 -3
- data/lib/chroma.rb +0 -6
@@ -0,0 +1,851 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"id": "1a0636e0",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"# Chroma Ruby Gem"
|
9
|
+
]
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"cell_type": "markdown",
|
13
|
+
"id": "3a5b8755",
|
14
|
+
"metadata": {},
|
15
|
+
"source": [
|
16
|
+
"## Database\n",
|
17
|
+
"\n",
|
18
|
+
"Database operations for Chroma"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": null,
|
24
|
+
"id": "1f0575c2",
|
25
|
+
"metadata": {},
|
26
|
+
"outputs": [],
|
27
|
+
"source": [
|
28
|
+
"require \"logger\"\n",
|
29
|
+
"require \"json\"\n",
|
30
|
+
"require \"securerandom\"\n",
|
31
|
+
"require \"open3\"\n",
|
32
|
+
"\n",
|
33
|
+
"# Requiere Chroma Ruby client.\n",
|
34
|
+
"require \"chroma-db\"\n",
|
35
|
+
"\n",
|
36
|
+
"# Configure Chroma's host. Here you can specify your own host.\n",
|
37
|
+
"Chroma.connect_host = \"http://localhost:8000\"\n",
|
38
|
+
"Chroma.logger = Logger.new($stdout)\n",
|
39
|
+
"Chroma.log_level = Chroma::LEVEL_ERROR\n",
|
40
|
+
"\n",
|
41
|
+
"# Check connection with Database's heartbeat\n",
|
42
|
+
"response = Chroma::Resources::Database.heartbeat\n",
|
43
|
+
"\n",
|
44
|
+
"IRuby.display \"Heartbear timestamp #{response[\"nanosecond heartbeat\"]}\"\n",
|
45
|
+
"\n",
|
46
|
+
"# Check current Chrome server version\n",
|
47
|
+
"version = Chroma::Resources::Database.version\n",
|
48
|
+
"\n",
|
49
|
+
"IRuby.display \"Chrome server version #{version}\"\n",
|
50
|
+
"\n",
|
51
|
+
"# Reset database (DANGER: This deletes all previos data)\n",
|
52
|
+
"Chroma::Resources::Database.reset"
|
53
|
+
]
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"cell_type": "markdown",
|
57
|
+
"id": "2c3c074f",
|
58
|
+
"metadata": {},
|
59
|
+
"source": [
|
60
|
+
"## Collections operations"
|
61
|
+
]
|
62
|
+
},
|
63
|
+
{
|
64
|
+
"cell_type": "markdown",
|
65
|
+
"id": "708a5935",
|
66
|
+
"metadata": {},
|
67
|
+
"source": [
|
68
|
+
"Collection operations for Chroma"
|
69
|
+
]
|
70
|
+
},
|
71
|
+
{
|
72
|
+
"cell_type": "code",
|
73
|
+
"execution_count": 2,
|
74
|
+
"id": "dfc806b5",
|
75
|
+
"metadata": {
|
76
|
+
"scrolled": true
|
77
|
+
},
|
78
|
+
"outputs": [
|
79
|
+
{
|
80
|
+
"data": {
|
81
|
+
"text/plain": [
|
82
|
+
"\"Collections in database 0\""
|
83
|
+
]
|
84
|
+
},
|
85
|
+
"metadata": {},
|
86
|
+
"output_type": "display_data"
|
87
|
+
},
|
88
|
+
{
|
89
|
+
"data": {
|
90
|
+
"text/plain": [
|
91
|
+
"#<Chroma::Resources::Collection:0x000000010551a878 @name=\"ruby-3.0\", @metadata={\"lang\"=>\"ruby\", \"gem\"=>\"chroma-rb\"}>"
|
92
|
+
]
|
93
|
+
},
|
94
|
+
"metadata": {},
|
95
|
+
"output_type": "display_data"
|
96
|
+
},
|
97
|
+
{
|
98
|
+
"data": {
|
99
|
+
"text/plain": [
|
100
|
+
"\"Collections in database 1\""
|
101
|
+
]
|
102
|
+
},
|
103
|
+
"metadata": {},
|
104
|
+
"output_type": "display_data"
|
105
|
+
},
|
106
|
+
{
|
107
|
+
"data": {
|
108
|
+
"text/plain": [
|
109
|
+
"\"Collections in database 0\""
|
110
|
+
]
|
111
|
+
},
|
112
|
+
"metadata": {},
|
113
|
+
"output_type": "display_data"
|
114
|
+
},
|
115
|
+
{
|
116
|
+
"data": {
|
117
|
+
"text/plain": [
|
118
|
+
"#<Chroma::Resources::Collection:0x0000000104d98190 @name=\"ruby-3.0\", @metadata={\"lang\"=>\"ruby\", \"gem\"=>\"chroma-rb\"}>"
|
119
|
+
]
|
120
|
+
},
|
121
|
+
"metadata": {},
|
122
|
+
"output_type": "display_data"
|
123
|
+
},
|
124
|
+
{
|
125
|
+
"data": {
|
126
|
+
"text/plain": [
|
127
|
+
"#<Chroma::Resources::Collection:0x0000000105577078 @name=\"ruby-3.2\", @metadata={\"lang\"=>\"ruby\", \"gem\"=>\"chroma-rb\"}>"
|
128
|
+
]
|
129
|
+
},
|
130
|
+
"metadata": {},
|
131
|
+
"output_type": "display_data"
|
132
|
+
}
|
133
|
+
],
|
134
|
+
"source": [
|
135
|
+
"# Confirm that database has no collections\n",
|
136
|
+
"collections = Chroma::Resources::Collection.list\n",
|
137
|
+
"\n",
|
138
|
+
"collection_name = \"ruby-3.0\"\n",
|
139
|
+
"\n",
|
140
|
+
"IRuby.display \"Collections in database #{collections.size}\"\n",
|
141
|
+
"\n",
|
142
|
+
"# Create a new collection\n",
|
143
|
+
"collection = Chroma::Resources::Collection.create(collection_name, {lang: \"ruby\", gem: \"chroma-rb\"})\n",
|
144
|
+
"\n",
|
145
|
+
"IRuby.display collection\n",
|
146
|
+
"\n",
|
147
|
+
"# Confirm that database has no collections\n",
|
148
|
+
"collections = Chroma::Resources::Collection.list\n",
|
149
|
+
"\n",
|
150
|
+
"IRuby.display \"Collections in database #{collections.size}\"\n",
|
151
|
+
"\n",
|
152
|
+
"# Delete collection\n",
|
153
|
+
"Chroma::Resources::Collection.delete(collection_name)\n",
|
154
|
+
"\n",
|
155
|
+
"# Re-Confirm that database has no collections\n",
|
156
|
+
"collections = Chroma::Resources::Collection.list\n",
|
157
|
+
"\n",
|
158
|
+
"IRuby.display \"Collections in database #{collections.size}\"\n",
|
159
|
+
"\n",
|
160
|
+
"# Create the collection again\n",
|
161
|
+
"Chroma::Resources::Collection.create(collection_name, {lang: \"ruby\", gem: \"chroma-rb\"})\n",
|
162
|
+
"\n",
|
163
|
+
"# Get the collection from database\n",
|
164
|
+
"collection = Chroma::Resources::Collection.get(collection_name)\n",
|
165
|
+
"IRuby.display collection\n",
|
166
|
+
"\n",
|
167
|
+
"# Modify collection name\n",
|
168
|
+
"new_collection_name = \"ruby-3.2\"\n",
|
169
|
+
"collection.modify(new_collection_name)\n",
|
170
|
+
"\n",
|
171
|
+
"# Get modified collection from database\n",
|
172
|
+
"collection = Chroma::Resources::Collection.get(new_collection_name)\n",
|
173
|
+
"IRuby.display collection"
|
174
|
+
]
|
175
|
+
},
|
176
|
+
{
|
177
|
+
"cell_type": "markdown",
|
178
|
+
"id": "36b82a17",
|
179
|
+
"metadata": {},
|
180
|
+
"source": [
|
181
|
+
"### Naive Ruby helper methods"
|
182
|
+
]
|
183
|
+
},
|
184
|
+
{
|
185
|
+
"cell_type": "code",
|
186
|
+
"execution_count": 3,
|
187
|
+
"id": "8708d9c5",
|
188
|
+
"metadata": {},
|
189
|
+
"outputs": [
|
190
|
+
{
|
191
|
+
"data": {
|
192
|
+
"text/plain": [
|
193
|
+
":run_system"
|
194
|
+
]
|
195
|
+
},
|
196
|
+
"execution_count": 3,
|
197
|
+
"metadata": {},
|
198
|
+
"output_type": "execute_result"
|
199
|
+
}
|
200
|
+
],
|
201
|
+
"source": [
|
202
|
+
"class Document\n",
|
203
|
+
" attr_reader :content, :metadata\n",
|
204
|
+
" \n",
|
205
|
+
" def initialize(content, metadata = {})\n",
|
206
|
+
" @content = content\n",
|
207
|
+
" @metadata = metadata\n",
|
208
|
+
" end\n",
|
209
|
+
"end\n",
|
210
|
+
"\n",
|
211
|
+
"class TextLoader\n",
|
212
|
+
" \n",
|
213
|
+
" \n",
|
214
|
+
" def initialize(file)\n",
|
215
|
+
" @file = file\n",
|
216
|
+
" end\n",
|
217
|
+
" \n",
|
218
|
+
" def load\n",
|
219
|
+
" [Document.new(File.read(@file), {source: @file})]\n",
|
220
|
+
" end\n",
|
221
|
+
"end\n",
|
222
|
+
"\n",
|
223
|
+
"class RecursiveWordTextSplitter\n",
|
224
|
+
" def initialize(chunk_size, chunk_overlap)\n",
|
225
|
+
" @chunk_size = chunk_size\n",
|
226
|
+
" @chunk_overlap = chunk_overlap\n",
|
227
|
+
" end\n",
|
228
|
+
"\n",
|
229
|
+
" def split_documents(documents)\n",
|
230
|
+
" original_documents = Array(documents)\n",
|
231
|
+
" \n",
|
232
|
+
" new_documents = []\n",
|
233
|
+
" original_documents.each do |document|\n",
|
234
|
+
" texts = split_text(document.content)\n",
|
235
|
+
" puts texts.size\n",
|
236
|
+
" texts.each do |text|\n",
|
237
|
+
" new_documents << Document.new(text, document.metadata)\n",
|
238
|
+
" end\n",
|
239
|
+
" end\n",
|
240
|
+
" \n",
|
241
|
+
" new_documents\n",
|
242
|
+
" end\n",
|
243
|
+
" \n",
|
244
|
+
" def split_text(text)\n",
|
245
|
+
" split_recursive(text, 0, [])\n",
|
246
|
+
" end\n",
|
247
|
+
"\n",
|
248
|
+
" private\n",
|
249
|
+
"\n",
|
250
|
+
" def split_recursive(text, start_index, chunks)\n",
|
251
|
+
" # Base case: If the remaining word count is less than the chunk size, return the chunks\n",
|
252
|
+
" if start_index + @chunk_size > text.length\n",
|
253
|
+
" chunks << text[start_index..-1]\n",
|
254
|
+
" return chunks\n",
|
255
|
+
" end\n",
|
256
|
+
"\n",
|
257
|
+
" # Calculate the end index of the current chunk\n",
|
258
|
+
" end_index = start_index + @chunk_size\n",
|
259
|
+
"\n",
|
260
|
+
" # Add the current chunk to the array\n",
|
261
|
+
" chunk = text[start_index...end_index]\n",
|
262
|
+
" position = detect_last_whitespace_or_line_return_position(chunk)\n",
|
263
|
+
" chunks << chunk[0..position]&.strip\n",
|
264
|
+
"\n",
|
265
|
+
" # Calculate the next start index with overlap\n",
|
266
|
+
" next_start_index = end_index - @chunk_overlap\n",
|
267
|
+
"\n",
|
268
|
+
" # Recursively split the remaining words\n",
|
269
|
+
" split_recursive(text, next_start_index, chunks)\n",
|
270
|
+
" end\n",
|
271
|
+
" \n",
|
272
|
+
" def detect_last_whitespace_or_line_return_position(string)\n",
|
273
|
+
" position = string.rindex(/\\s|\\n/)\n",
|
274
|
+
" position.nil? ? -1 : position\n",
|
275
|
+
" end\n",
|
276
|
+
"end\n",
|
277
|
+
"\n",
|
278
|
+
"def run_system(command)\n",
|
279
|
+
" stdin, stdout, stderr, wait_thr = Open3.popen3(command)\n",
|
280
|
+
" stdout_data = stdout.gets(nil)\n",
|
281
|
+
" stdout.close\n",
|
282
|
+
" stderr_data = stderr.gets(nil)\n",
|
283
|
+
" stderr.close\n",
|
284
|
+
" exit_code = wait_thr.value\n",
|
285
|
+
" \n",
|
286
|
+
" [stdout_data, stderr_data, exit_code]\n",
|
287
|
+
"end"
|
288
|
+
]
|
289
|
+
},
|
290
|
+
{
|
291
|
+
"cell_type": "markdown",
|
292
|
+
"id": "01f60953",
|
293
|
+
"metadata": {},
|
294
|
+
"source": [
|
295
|
+
"## Transform texts"
|
296
|
+
]
|
297
|
+
},
|
298
|
+
{
|
299
|
+
"cell_type": "markdown",
|
300
|
+
"id": "733db3fb",
|
301
|
+
"metadata": {},
|
302
|
+
"source": [
|
303
|
+
"Using the naive Ruby classes transforms documents into chunks"
|
304
|
+
]
|
305
|
+
},
|
306
|
+
{
|
307
|
+
"cell_type": "code",
|
308
|
+
"execution_count": 4,
|
309
|
+
"id": "f6fcee33",
|
310
|
+
"metadata": {},
|
311
|
+
"outputs": [
|
312
|
+
{
|
313
|
+
"data": {
|
314
|
+
"text/plain": [
|
315
|
+
"1"
|
316
|
+
]
|
317
|
+
},
|
318
|
+
"execution_count": 4,
|
319
|
+
"metadata": {},
|
320
|
+
"output_type": "execute_result"
|
321
|
+
}
|
322
|
+
],
|
323
|
+
"source": [
|
324
|
+
"documents = TextLoader.new(\"ruby.txt\").load\n",
|
325
|
+
"documents.size"
|
326
|
+
]
|
327
|
+
},
|
328
|
+
{
|
329
|
+
"cell_type": "code",
|
330
|
+
"execution_count": 5,
|
331
|
+
"id": "f00e5431",
|
332
|
+
"metadata": {},
|
333
|
+
"outputs": [
|
334
|
+
{
|
335
|
+
"name": "stdout",
|
336
|
+
"output_type": "stream",
|
337
|
+
"text": [
|
338
|
+
"2\n"
|
339
|
+
]
|
340
|
+
},
|
341
|
+
{
|
342
|
+
"data": {
|
343
|
+
"text/plain": [
|
344
|
+
"2"
|
345
|
+
]
|
346
|
+
},
|
347
|
+
"execution_count": 5,
|
348
|
+
"metadata": {},
|
349
|
+
"output_type": "execute_result"
|
350
|
+
}
|
351
|
+
],
|
352
|
+
"source": [
|
353
|
+
"text_splitter = RecursiveWordTextSplitter.new(chunk_size=1000, chunk_overlap=200)\n",
|
354
|
+
"texts = text_splitter.split_documents(documents)\n",
|
355
|
+
"texts.size"
|
356
|
+
]
|
357
|
+
},
|
358
|
+
{
|
359
|
+
"cell_type": "code",
|
360
|
+
"execution_count": 6,
|
361
|
+
"id": "47ec27a0",
|
362
|
+
"metadata": {},
|
363
|
+
"outputs": [
|
364
|
+
{
|
365
|
+
"data": {
|
366
|
+
"text/plain": [
|
367
|
+
"#<#<Class:0x0000000104f880e0>::Document:0x00000001055db370 @content=\"Array#any?\\n\\nany? → true or false\\nany? {|element| ... } → true or false\\nany?(obj) → true or false\\n\\nReturns true if any element of self meets a given criterion.\\n\\nWith no block given and no argument, returns true if self has any truthy element, false otherwise:\\n\\n[nil, 0, false].any? # => true\\n[nil, false].any? # => false\\n[].any? # => false\\nWith a block given and no argument, calls the block with each element in self; returns true if the block returns any truthy value, false otherwise:\\n\\n[0, 1, 2].any? {|element| element > 1 } # => true\\n[0, 1, 2].any? {|element| element > 2 } # => false\\nIf argument obj is given, returns true if obj.=== any element, false otherwise:\\n\\n['food', 'drink'].any?(/foo/) # => true\\n['food', 'drink'].any?(/bar/) # => false\\n[].any?(/foo/) # => false\\n[0, 1, 2].any?(1) # => true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns\", @metadata={:source=>\"ruby.txt\"}>"
|
368
|
+
]
|
369
|
+
},
|
370
|
+
"execution_count": 6,
|
371
|
+
"metadata": {},
|
372
|
+
"output_type": "execute_result"
|
373
|
+
}
|
374
|
+
],
|
375
|
+
"source": [
|
376
|
+
"texts[0]"
|
377
|
+
]
|
378
|
+
},
|
379
|
+
{
|
380
|
+
"cell_type": "markdown",
|
381
|
+
"id": "77a5327e",
|
382
|
+
"metadata": {},
|
383
|
+
"source": [
|
384
|
+
"**Prepare content**"
|
385
|
+
]
|
386
|
+
},
|
387
|
+
{
|
388
|
+
"cell_type": "code",
|
389
|
+
"execution_count": 7,
|
390
|
+
"id": "44354d3b",
|
391
|
+
"metadata": {},
|
392
|
+
"outputs": [
|
393
|
+
{
|
394
|
+
"data": {
|
395
|
+
"text/plain": [
|
396
|
+
"[\"Array#any?\\n\\nany? → true or false\\nany? {|element| ... } → true or false\\nany?(obj) → true or false\\n\\nReturns true if any element of self meets a given criterion.\\n\\nWith no block given and no argument, returns true if self has any truthy element, false otherwise:\\n\\n[nil, 0, false].any? # => true\\n[nil, false].any? # => false\\n[].any? # => false\\nWith a block given and no argument, calls the block with each element in self; returns true if the block returns any truthy value, false otherwise:\\n\\n[0, 1, 2].any? {|element| element > 1 } # => true\\n[0, 1, 2].any? {|element| element > 2 } # => false\\nIf argument obj is given, returns true if obj.=== any element, false otherwise:\\n\\n['food', 'drink'].any?(/foo/) # => true\\n['food', 'drink'].any?(/bar/) # => false\\n[].any?(/foo/) # => false\\n[0, 1, 2].any?(1) # => true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns\", \"true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns a new Array whose elements are the return values from the block:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map {|element| element.class }\\na1 # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map>\\nArray#collect is an alias for Array#map.\\n\\nAlias for: collect\\nmap! {|element| ... } → self\\nmap! → new_enumerator\\nCalls the block, if given, with each element; replaces the element with the block’s return value:\\n\\na = [:foo, 'bar', 2]\\na.map! { |element| element.class } # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map!\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map!>\\nArray#collect! is an alias for Array#map!.\\n\\nAlias for: collect!\\n\"]"
|
397
|
+
]
|
398
|
+
},
|
399
|
+
"metadata": {},
|
400
|
+
"output_type": "display_data"
|
401
|
+
}
|
402
|
+
],
|
403
|
+
"source": [
|
404
|
+
"contents = texts.map(&:content)\n",
|
405
|
+
"IRuby.display contents"
|
406
|
+
]
|
407
|
+
},
|
408
|
+
{
|
409
|
+
"cell_type": "code",
|
410
|
+
"execution_count": 8,
|
411
|
+
"id": "b5307484",
|
412
|
+
"metadata": {},
|
413
|
+
"outputs": [
|
414
|
+
{
|
415
|
+
"data": {
|
416
|
+
"text/plain": [
|
417
|
+
"[{:source=>\"ruby.txt\"}, {:source=>\"ruby.txt\"}]"
|
418
|
+
]
|
419
|
+
},
|
420
|
+
"metadata": {},
|
421
|
+
"output_type": "display_data"
|
422
|
+
}
|
423
|
+
],
|
424
|
+
"source": [
|
425
|
+
"metadatas = texts.map(&:metadata)\n",
|
426
|
+
"IRuby.display metadatas"
|
427
|
+
]
|
428
|
+
},
|
429
|
+
{
|
430
|
+
"cell_type": "code",
|
431
|
+
"execution_count": 9,
|
432
|
+
"id": "36c1b558",
|
433
|
+
"metadata": {},
|
434
|
+
"outputs": [
|
435
|
+
{
|
436
|
+
"data": {
|
437
|
+
"text/plain": [
|
438
|
+
"2060"
|
439
|
+
]
|
440
|
+
},
|
441
|
+
"execution_count": 9,
|
442
|
+
"metadata": {},
|
443
|
+
"output_type": "execute_result"
|
444
|
+
}
|
445
|
+
],
|
446
|
+
"source": [
|
447
|
+
"File.open(\"documents.json\",\"w\") do |f|\n",
|
448
|
+
" f.write(contents.to_json)\n",
|
449
|
+
"end"
|
450
|
+
]
|
451
|
+
},
|
452
|
+
{
|
453
|
+
"cell_type": "markdown",
|
454
|
+
"id": "9534bb26",
|
455
|
+
"metadata": {},
|
456
|
+
"source": [
|
457
|
+
"## Python glue"
|
458
|
+
]
|
459
|
+
},
|
460
|
+
{
|
461
|
+
"cell_type": "markdown",
|
462
|
+
"id": "31eb215a",
|
463
|
+
"metadata": {},
|
464
|
+
"source": [
|
465
|
+
"We need Python glue to create text embeddings.\n",
|
466
|
+
"\n",
|
467
|
+
"I first try to use Ruby's gem **Pycall** to use HuggingFace's embeddings but I couldn't make it to work. The commented code shows what I was trying to do here and also shows the error.\n",
|
468
|
+
"\n",
|
469
|
+
"I opted to run Python code as system command and capture the output to bring it back to Ruby.\n",
|
470
|
+
"\n",
|
471
|
+
"Before you need the following Python libraries installed in your system."
|
472
|
+
]
|
473
|
+
},
|
474
|
+
{
|
475
|
+
"cell_type": "code",
|
476
|
+
"execution_count": 10,
|
477
|
+
"id": "02f9945a",
|
478
|
+
"metadata": {},
|
479
|
+
"outputs": [
|
480
|
+
{
|
481
|
+
"data": {
|
482
|
+
"text/plain": [
|
483
|
+
"#<Process::Status: pid 3263 exit 0>"
|
484
|
+
]
|
485
|
+
},
|
486
|
+
"metadata": {},
|
487
|
+
"output_type": "display_data"
|
488
|
+
}
|
489
|
+
],
|
490
|
+
"source": [
|
491
|
+
"stdout_data, stderr_data, exit_code = run_system(\"pip -q install langchain sentence_transformers InstructorEmbedding\")\n",
|
492
|
+
"\n",
|
493
|
+
"IRuby.display stdout_data\n",
|
494
|
+
"IRuby.display stderr_data\n",
|
495
|
+
"IRuby.display exit_code"
|
496
|
+
]
|
497
|
+
},
|
498
|
+
{
|
499
|
+
"cell_type": "markdown",
|
500
|
+
"id": "eac28970",
|
501
|
+
"metadata": {},
|
502
|
+
"source": [
|
503
|
+
"**WARNING**: The following code might take a long time to run the first time, since it downloads and install HuggingFace models before creating embeddings for our texts."
|
504
|
+
]
|
505
|
+
},
|
506
|
+
{
|
507
|
+
"cell_type": "code",
|
508
|
+
"execution_count": 14,
|
509
|
+
"id": "b8e3026f",
|
510
|
+
"metadata": {},
|
511
|
+
"outputs": [
|
512
|
+
{
|
513
|
+
"data": {
|
514
|
+
"text/plain": [
|
515
|
+
"\"load INSTRUCTOR_Transformer\\nmax_seq_length 512\\nEmbeddings at embeddings.json\\n\""
|
516
|
+
]
|
517
|
+
},
|
518
|
+
"metadata": {},
|
519
|
+
"output_type": "display_data"
|
520
|
+
},
|
521
|
+
{
|
522
|
+
"data": {
|
523
|
+
"text/plain": [
|
524
|
+
"#<Process::Status: pid 3304 exit 0>"
|
525
|
+
]
|
526
|
+
},
|
527
|
+
"metadata": {},
|
528
|
+
"output_type": "display_data"
|
529
|
+
}
|
530
|
+
],
|
531
|
+
"source": [
|
532
|
+
"# require \"pycall\"\n",
|
533
|
+
"# require \"pycall/import\"\n",
|
534
|
+
"# include PyCall::Import\n",
|
535
|
+
"\n",
|
536
|
+
"# pyimport \"InstructorEmbedding\" import INSTRUCTOR\n",
|
537
|
+
"# pyimport \"langchain.embeddings\", as: \"embeddings\"\n",
|
538
|
+
"\n",
|
539
|
+
"# from InstructorEmbedding import INSTRUCTOR\n",
|
540
|
+
"# from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
|
541
|
+
"\n",
|
542
|
+
"# instructor_embeddings = embeddings.HuggingFaceInstructEmbeddings.new(\n",
|
543
|
+
"# model_name: \"hkunlp/instructor-xl\",\n",
|
544
|
+
"# model_kwargs: { \"device\" => \"cpu\" } # Use cuda as value if you have a GPU.\n",
|
545
|
+
"# )\n",
|
546
|
+
"#\n",
|
547
|
+
"# PyCall::LibPythonFunctionNotFound: Unable to find the required symbol in libpython: _Py_NoneStruct\n",
|
548
|
+
"\n",
|
549
|
+
"command = <<~PYTHON\n",
|
550
|
+
"python - << EOF\n",
|
551
|
+
"import json\n",
|
552
|
+
"from InstructorEmbedding import INSTRUCTOR\n",
|
553
|
+
"from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
|
554
|
+
"\n",
|
555
|
+
"from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
|
556
|
+
"instructor_embeddings = HuggingFaceInstructEmbeddings(model_name=\"hkunlp/instructor-xl\", \n",
|
557
|
+
" model_kwargs={\"device\": \"cpu\"})\n",
|
558
|
+
"\n",
|
559
|
+
"with open(\"documents.json\") as f:\n",
|
560
|
+
" file_content = f.read()\n",
|
561
|
+
"\n",
|
562
|
+
"documents = json.loads(file_content)\n",
|
563
|
+
" \n",
|
564
|
+
"embeddings = instructor_embeddings.embed_documents(list(documents))\n",
|
565
|
+
"\n",
|
566
|
+
"with open(\"embeddings.json\", \"w\", encoding=\"utf-8\") as file:\n",
|
567
|
+
" json.dump(embeddings, file, ensure_ascii=False, indent=4)\n",
|
568
|
+
" \n",
|
569
|
+
"print(\"Embeddings at embeddings.json\")\n",
|
570
|
+
"EOF\n",
|
571
|
+
"PYTHON\n",
|
572
|
+
"\n",
|
573
|
+
"stdout_data, stderr_data, exit_code = run_system(command)\n",
|
574
|
+
"\n",
|
575
|
+
"IRuby.display stdout_data\n",
|
576
|
+
"IRuby.display stderr_data\n",
|
577
|
+
"IRuby.display exit_code\n"
|
578
|
+
]
|
579
|
+
},
|
580
|
+
{
|
581
|
+
"cell_type": "markdown",
|
582
|
+
"id": "c40edc3a",
|
583
|
+
"metadata": {},
|
584
|
+
"source": [
|
585
|
+
"## Embeddings in collection"
|
586
|
+
]
|
587
|
+
},
|
588
|
+
{
|
589
|
+
"cell_type": "markdown",
|
590
|
+
"id": "a8d84181",
|
591
|
+
"metadata": {},
|
592
|
+
"source": [
|
593
|
+
"Embedding operations for Chroma"
|
594
|
+
]
|
595
|
+
},
|
596
|
+
{
|
597
|
+
"cell_type": "code",
|
598
|
+
"execution_count": 15,
|
599
|
+
"id": "6234ad5d",
|
600
|
+
"metadata": {},
|
601
|
+
"outputs": [
|
602
|
+
{
|
603
|
+
"data": {
|
604
|
+
"text/plain": [
|
605
|
+
"[[0.03228979930281639, 0.04374200478196144, 0.014979278668761253, -0.008304460905492306, -0.0036412617191672325, -0.04301042854785919, -0.08898357301950455, 0.010292756371200085, -0.04601152241230011, -0.01627073809504509, 0.0423235185444355, 0.035671770572662354, -0.03516373038291931, -0.10667434334754944, -0.0305192731320858, -0.00317957391962409, 0.00898097362369299, -0.04044022411108017, -0.03859616443514824, -2.847463656507898e-05, -0.02551359124481678, 0.015683457255363464, -0.03809453547000885, 0.026084054261446, 0.004630913957953453, -0.05073123052716255, -0.005383108276873827, 0.00900851096957922, -0.022124305367469788, 0.0008553847437724471, 0.03406482934951782, 0.006923138629645109, 0.016204921528697014, -0.0248075183480978, 0.020003201439976692, 0.015132367610931396, -0.01962454617023468, -0.03499720245599747, 0.01819889433681965, -0.0026789242401719093, -0.06468355655670166, 0.023844322189688683, 0.012745407409965992, -0.03798774629831314, 0.03615935146808624, 0.07037360221147537, -0.01144226361066103, -0.008408664725720882, 0.01989738829433918, -0.03551855683326721, -0.009068832732737064, -0.011420144699513912, -0.053683359175920486, -0.004001090303063393, -0.05866784229874611, -0.028031479567289352, -0.04380376264452934, 0.0010980378137901425, -0.014896156266331673, 0.07904498279094696, -0.002795759355649352, 0.042044781148433685, 0.007605243939906359, 0.029185891151428223, -0.0007026402745395899, 0.027147360146045685, 0.03978419303894043, 0.02210610918700695, -0.012754594907164574, 0.004411553964018822, -0.045703720301389694, 0.03973427787423134, -0.04186716303229332, -0.03332607448101044, -0.0015854788944125175, 0.09185648709535599, 0.03097948245704174, 0.008720682002604008, 0.011866453103721142, -0.025312529876828194, -0.04079953953623772, 0.004299015738070011, -0.07025784999132156, 0.01059478335082531, -0.005177331157028675, 0.01095830462872982, -0.014626510441303253, 0.03253486379981041, 0.018911102786660194, 0.058727703988552094, -0.02561543881893158, -0.051282405853271484, -0.001387969241477549, -0.007063963450491428, -0.017050521448254585, 0.03534478694200516, 0.03895848989486694, -0.002462860895320773, 0.013893542811274529, -0.08051256835460663, 0.027548780664801598, 0.03188430890440941, -0.028614526614546776, -0.08799060434103012, -0.0124077582731843, -0.04059695079922676, 0.038384631276130676, 0.0369478240609169, 0.04113375023007393, -0.007888813503086567, 0.022122997790575027, 0.021126342937350273, 0.020651601254940033, -0.026094896718859673, 0.04217687249183655, -0.055210452526807785, 0.05842498317360878, 0.08628174662590027, -0.006704084575176239, -0.008585629053413868, 0.029757793992757797, 0.03337784856557846, 0.02287106402218342, 0.041264794766902924, 0.008918775245547295, 0.01518432330340147, 0.04925611615180969, -0.02022290974855423, 0.000812036101706326, -0.019539330154657364, -0.0007718283450230956, 0.006669742986559868, 0.0041087307035923, -0.0065430463291704655, 0.06524598598480225, -0.01344641949981451, 0.04301084205508232, -0.02710653841495514, 0.0224214568734169, -0.09976571053266525, 0.021154599264264107, -0.05801623687148094, 0.07450253516435623, -0.011398211121559143, -0.0265400018543005, 0.0035808708053082228, -0.021095940843224525, 0.03220225125551224, 0.010811158455908298, 0.02507082000374794, -0.004476296249777079, -0.021498845890164375, 0.004178662318736315, -0.012609281577169895, -0.04164125397801399, -0.016675475984811783, -0.05637894198298454, -0.00039760651998221874, 0.013929449021816254, 0.015408114530146122, 0.043402332812547684, -0.04887295141816139, -0.029663026332855225, 0.07050147652626038, -0.07254905253648758, 0.011923249810934067, 0.03870907053351402, -0.02336050756275654, 0.012871748767793179, 0.02158389799296856, 0.020876718685030937, -0.007757823448628187, -0.0452481284737587, -0.03634555637836456, 0.0010483156656846404, -0.023808754980564117, 0.01596495322883129, 0.0714854747056961, 0.0018134007696062326, 0.010900470428168774, 0.03798305243253708, 0.019611285999417305, 0.044536292552948, 0.04413134977221489, -0.0038643558509647846, 0.016711169853806496, -0.04104889929294586, -0.051808178424835205, -0.005617272108793259, 0.017367791384458542, 0.02081133984029293, -0.1284034550189972, 0.04220516234636307, 0.05191464722156525, 0.04524064436554909, 0.009792013093829155, -0.008229931816458702, 0.07202937453985214, -0.016330188140273094, 0.016698677092790604, 0.012947555631399155, 0.0011996577959507704, 0.0176108218729496, 0.01835097186267376, -0.002980436198413372, 0.03625079616904259, 0.007594248279929161, 0.016943421214818954, 0.00224352371878922, 0.004125209059566259, 0.005772887729108334, 0.020054487511515617, -0.0021010898053646088, 0.023027975112199783, -0.028509322553873062, 0.12751130759716034, 0.021971622481942177, -0.003281302284449339, -0.018533241003751755, 0.0691463053226471, -0.007271943148225546, 0.02029179222881794, 0.010470682755112648, 0.005038999952375889, 0.062135156244039536, -0.04069601371884346, -0.03258826583623886, -0.02782304212450981, 0.08235930651426315, 0.016592130064964294, 0.00679009361192584, 0.01324023399502039, 0.06185121089220047, 0.04765875265002251, -0.011462446302175522, -0.041887979954481125, -0.030739666894078255, -0.06439483910799026, -0.03637808561325073, -0.03531050309538841, 0.0031687167938798666, -0.010794159024953842, -0.033315885812044144, -0.008723964914679527, 0.032397761940956116, 0.0710805356502533, -0.022815288975834846, 0.03034850023686886, -0.0049735307693481445, -0.018393810838460922, -0.03242115303874016, -0.06379392743110657, 0.032948728650808334, 0.025140387937426567, 0.00576405692845583, 0.014179369434714317, -0.062080904841423035, -0.06615299731492996, -0.003071098355576396, 0.01967025175690651, -0.03492140397429466, -0.007880707271397114, -0.057441577315330505, 0.002134165959432721, -0.025045592337846756, -0.008549192920327187, 0.048091255128383636, -0.014099105261266232, 0.04910006374120712, 0.010198627598583698, -0.029759423807263374, 0.04051714017987251, 0.06776628643274307, -0.034147296100854874, -0.026143042370676994, -0.006945130880922079, -0.008138955570757389, -0.05489006265997887, -0.04387400299310684, 0.046449482440948486, -0.019213583320379257, -0.022300662472844124, -0.008200730197131634, -0.03233560174703598, 0.003402227535843849, 0.011234400793910027, -0.002742406213656068, -0.02069772034883499, 0.038628507405519485, -0.04607287794351578, 0.016672592610120773, 0.04447432607412338, -0.033386338502168655, -0.007463876157999039, -0.052367787808179855, -0.013385064899921417, -0.022224988788366318, 0.04003913700580597, 0.013125717639923096, -0.0002900198451243341, -0.056771036237478256, -0.021361535415053368, -0.032924383878707886, -0.036289069801568985, -0.01641670987010002, -0.036186669021844864, -0.03189323469996452, -0.006797273643314838, 0.004788490012288094, 0.0068277702666819096, 0.02284691296517849, 0.059777747839689255, 0.04041121155023575, 0.019820593297481537, -0.050160445272922516, -0.04713621735572815, 0.04364219307899475, 0.02694518491625786, -0.015432613901793957, -0.029616745188832283, 0.07202371954917908, -0.011115726083517075, 0.015347307547926903, 0.014280633069574833, -0.002249254612252116, 0.019911013543605804, 0.00929440651088953, 0.020636478438973427, 0.028549201786518097, -0.10426840931177139, 0.028348496183753014, 0.04307335987687111, 0.006135597825050354, 0.011010325513780117, 0.03592953458428383, 0.05262245237827301, -0.004868381656706333, 0.01509394496679306, 0.038539350032806396, 0.0036197768058627844, -0.03548605740070343, -0.013938318938016891, -0.07516362518072128, 0.019009025767445564, -0.07927123457193375, 0.030785150825977325, 0.01724003255367279, 0.10079476982355118, -0.05789526179432869, -0.02340284176170826, 0.009072965942323208, -0.008314074948430061, 0.049702033400535583, 0.024742692708969116, 0.015528418123722076, 0.00030811724718660116, 0.027656322345137596, 0.023148810490965843, -0.006079103332012892, 0.02465176209807396, 0.06162107363343239, 0.06096961349248886, -0.00870341807603836, -0.056443870067596436, 0.026660898700356483, -0.004470273852348328, -0.04705857113003731, -0.10955272614955902, 0.03670618310570717, 0.04034575819969177, 0.0022876777220517397, 0.022575348615646362, 0.026446910575032234, 0.039642687886953354, -0.04091582074761391, 0.06343641877174377, -0.0324559323489666, -0.059108342975378036, -0.032128505408763885, -0.03802308812737465, 0.03290381282567978, -0.0035157541278749704, 0.04257664084434509, 0.030607890337705612, 0.03177213296294212, 0.006457501091063023, 0.043441928923130035, -0.009309722110629082, 0.01678484119474888, -0.007282345090061426, 0.006767678540199995, 0.047442350536584854, -0.006221396382898092, 0.03569779172539711, 0.01492008101195097, -0.03090090863406658, -0.05462432652711868, -0.03663143143057823, -0.039276741445064545, 0.003162890672683716, -0.006974783726036549, -0.01605837605893612, -0.007104011252522469, -0.00497934827581048, 0.01746319606900215, 0.009480826556682587, -0.021224495023489, 0.07256880402565002, 0.011976632289588451, -0.005190932657569647, 0.0137846814468503, -0.04294455423951149, -0.004575804807245731, -0.029106736183166504, -0.00396405765786767, 0.0162188820540905, -0.05301636457443237, -0.02794911526143551, 0.01211538352072239, 0.011636774055659771, 0.001118947984650731, -0.030082430690526962, 0.03802220895886421, 0.0018671602010726929, -0.04864092543721199, 0.010714356787502766, 0.019209057092666626, 0.01017305813729763, -0.023894254118204117, 0.06532895565032959, 0.0342823825776577, 0.0030650184489786625, 0.012038799934089184, 0.036395054310560226, -0.053046561777591705, 0.11037362366914749, -0.039846278727054596, 0.03756939247250557, -0.02227490022778511, 0.07383814454078674, 0.025581585243344307, -0.019945910200476646, 0.0238353181630373, 0.05056775361299515, 0.03472975641489029, -0.01490088738501072, 0.008003530092537403, -0.0071889315731823444, 0.06521737575531006, 0.03012000396847725, 0.029991764575242996, 0.04536479339003563, -0.041474953293800354, 0.040479108691215515, 0.013666344806551933, -0.03651740401983261, 0.010488818399608135, -0.02235833741724491, -0.00496672373265028, -0.0008841739036142826, 0.0019337058765813708, 0.016044721007347107, -0.02218416891992092, -0.0053303916938602924, 0.01921093836426735, -0.012642554938793182, -0.021373819559812546, 0.011728109791874886, 0.020468514412641525, 0.05577414110302925, -0.0009694069158285856, 0.04059097170829773, -0.02179683931171894, -0.026009798049926758, -0.04639671742916107, -0.024962976574897766, -0.014504465274512768, -0.0220566987991333, -0.03474007174372673, -0.002774641616269946, 0.027694355696439743, 0.050290998071432114, 0.0032427459955215454, -0.023253049701452255, -0.04699283838272095, 0.04116019234061241, 0.020931316539645195, 0.019338443875312805, -0.05171588063240051, 0.005383999552577734, 0.008072840981185436, 0.03612837567925453, -0.006952731870114803, -0.03698558732867241, 0.017056601122021675, -0.0421205498278141, -0.04169953614473343, -0.05199119448661804, 0.008002465590834618, -0.06126873567700386, -0.005950680002570152, 0.02284565567970276, -0.023383231833577156, 0.01929771527647972, -0.023887041956186295, -0.011285330168902874, 0.006013786420226097, -0.009053424932062626, 0.015977272763848305, -0.010940823704004288, 0.08568454533815384, -0.07215405255556107, -0.021054301410913467, -0.0248736459761858, 0.01133863627910614, -0.030574915930628777, 0.04119018837809563, -0.005135617684572935, 0.01942487619817257, -0.05801719054579735, -0.006375058088451624, 0.04047420620918274, 0.0459258034825325, -1.911036451929249e-05, 0.00030676211463287473, -0.03143990784883499, 0.034778762608766556, 0.031034106388688087, -0.0039030632469803095, 0.05057138949632645, 0.005497979465872049, -0.04800192266702652, -0.002724874299019575, -0.010492730885744095, 0.004971216898411512, 0.02917555905878544, -0.009676370769739151, 0.015930138528347015, -0.09533631056547165, -0.025691652670502663, -0.023998329415917397, 0.0009392568608745933, -0.08232671767473221, 0.040197454392910004, 0.0007667712634429336, 0.00529611436650157, -0.03325199708342552, -0.0531618669629097, -0.032503001391887665, 0.01928878203034401, -0.00836867094039917, 0.007887274026870728, -0.06439164280891418, 0.02499997615814209, -0.026714514940977097, 0.0038523240946233273, 0.040690355002880096, -0.04664188623428345, 0.006983313709497452, -0.023159924894571304, 0.020506396889686584, -0.010959654115140438, -0.06715796887874603, 0.06896034628152847, -0.08469228446483612, -0.038469359278678894, 0.06704585999250412, -0.028775928542017937, -0.009676976129412651, -0.016521157696843147, 0.035746727138757706, -0.02713773399591446, -0.01144022960215807, -0.03221476078033447, 0.0852692574262619, -0.015971260145306587, -0.003657831344753504, 0.03357626497745514, 0.06840836256742477, -0.017202356830239296, -0.008342721499502659, 0.0035089135635644197, -0.02370384894311428, -0.04318295419216156, -0.0039469413459300995, -0.019475433975458145, 0.022524060681462288, 0.027003657072782516, -0.038293030112981796, 0.031019441783428192, -0.05147705227136612, -0.01628013141453266, 0.01737838052213192, 0.0252314992249012, -0.009901595301926136, 0.04354410618543625, 0.037365593016147614, -0.0286478940397501, -0.022008292376995087, -0.014669063501060009, 0.015262885950505733, 0.011050564236938953, -0.08895622938871384, 0.04134802147746086, -0.016268711537122726, 0.07376548647880554, 0.009304611012339592, 0.041516561061143875, -0.00583286676555872, 0.026974735781550407, 0.06593198329210281, 0.034738361835479736, 0.06453622877597809, -0.00207549799233675, -0.01164978463202715, 0.010376514866948128, -0.03948894143104553, 0.027864789590239525, 0.009509708732366562, 0.03852664679288864, -0.049469564110040665, -0.002015027217566967, -0.015586158260703087, 0.08130050450563431, 0.010879945009946823, 0.04154130443930626, -0.07157342880964279, -0.00017352669965475798, -0.029232444241642952, -0.040825944393873215, -0.005744085647165775, -0.08170077204704285, 0.039324529469013214, -0.007448403164744377, -0.041549306362867355, -0.048940032720565796, 0.04981829971075058, -0.012164590880274773, 0.015784665942192078, -0.05923778563737869, -0.025606177747249603, 0.05182402580976486, -0.017923628911376, 0.043175458908081055, -0.04361496865749359, -0.07970111072063446, 0.036119718104600906, -0.020883092656731606, -0.03686083108186722, 0.01086772233247757, -0.022523140534758568, -0.0078078387305140495, -0.012556010857224464, -0.04712480306625366, -0.017338477075099945, 0.019165020436048508, 0.016326624900102615, 0.0305654164403677, -0.014697655104100704, -0.022187815979123116, -0.05051249638199806, 0.06099841371178627, -0.05704561248421669, -0.011833279393613338, -0.005388813558965921, 0.04473598673939705, -0.015221077017486095, 0.05795009806752205, 0.02312096394598484, 0.0075258370488882065, -0.010232404805719852, -0.04368416965007782, -0.018344486132264137, 0.002154686488211155, 0.04324810951948166, 0.03754868358373642, -0.04392080008983612, -0.043402522802352905, -0.03687596321105957, -0.03518514335155487, 0.009493282064795494, -0.0041145095601677895, -0.004101223777979612, 0.008156840689480305, 0.00020232163660693914, -0.012252955697476864, -0.01926516555249691, 0.05196809768676758, -0.035034339874982834, 0.020020512863993645, -0.0158267579972744, 0.006052207201719284, 0.03241325914859772, 0.02777276746928692, 0.032777801156044006, -0.01226889993995428, 0.06021249294281006, 0.04282548651099205, -0.04650556668639183, -0.05326072871685028, 0.016466110944747925, -0.002826716983690858, 0.022099485620856285, -0.012507491745054722, 0.016525873914361, -0.011513783596456051, -0.01726444996893406, -0.037607740610837936, 0.059679411351680756, 0.02615354210138321, -0.08533825725317001, -0.012763634324073792, -0.03461074456572533, -0.0070082577876746655, -0.01810484007000923, 0.019929876551032066, -0.050364892929792404, -0.022224780172109604, 0.022669365629553795, 0.04501258209347725, -0.011874476447701454, -0.0026172054931521416, -0.07258472591638565, 0.03750914707779884, -0.031652405858039856, -0.007753782439976931, -0.021108277142047882, -0.023893136531114578, 0.024009691551327705, 0.021072732284665108, 0.03558477386832237, 0.025773843750357628, -0.017213836312294006, -0.028489870950579643, 0.023708000779151917, 0.007439128588885069, 0.0071917143650352955, 0.03626685217022896, -0.03297760710120201, 0.06409215927124023, 0.03508260101079941, -0.0024931596126407385, 0.06305541098117828, -0.02701105922460556, 0.032071553170681, -0.06274864822626114, -0.0038116576615720987, 0.06042008101940155, -0.0268373042345047, -0.04468357190489769, -0.031169043853878975, 0.01351690199226141, -0.012788795866072178, 0.029781602323055267, 0.016967879608273506, 0.03223644942045212, -0.017847245559096336, 0.04072536900639534, 0.06372857093811035, -0.019674163311719894, -0.01857542246580124, 0.04360852390527725, 0.03253550082445145, 0.006603616289794445, -0.00363582419231534, -0.01563490927219391, -0.015182333067059517, 0.11372384428977966], [0.01605958677828312, 0.030003096908330917, 0.025761693716049194, -0.01957627572119236, -0.014232398942112923, -0.039433401077985764, -0.052275773137807846, -0.014058810658752918, -0.047918811440467834, -0.026954464614391327, 0.020939834415912628, 0.05790621042251587, -0.03408670797944069, -0.10953404009342194, -0.01096643041819334, -0.0011546972673386335, 0.007385305128991604, -0.05103980377316475, -0.04718507453799248, 0.017328854650259018, -0.04007676988840103, 0.033916473388671875, -0.04406605660915375, 0.019235199317336082, 0.003078594570979476, -0.060211047530174255, 0.015151334926486015, 0.02847081795334816, -0.026908403262495995, 0.014980873093008995, 0.03198567032814026, 0.012704729102551937, 0.01765180192887783, -0.027017798274755478, 0.03376148268580437, 0.017599033191800117, 0.0006646771216765046, -0.051346372812986374, 0.0033767514396458864, 0.0007753576501272619, -0.06594289094209671, 0.01464784611016512, 0.01948600821197033, -0.04875009134411812, 0.027182718738913536, 0.057590991258621216, 0.006650134455412626, -0.022266749292612076, 0.02270965650677681, -0.025416556745767593, -0.013004115782678127, -0.02796812728047371, -0.035526473075151443, 0.004798163194209337, -0.041392941027879715, -0.0062143937684595585, -0.02067559026181698, -0.010122218169271946, -0.004470644518733025, 0.056735098361968994, -0.0030726587865501642, 0.050774551928043365, 0.031308289617300034, 0.029799889773130417, 0.02511328086256981, 0.012285269796848297, 0.009048471227288246, 0.01319021638482809, -0.01524567510932684, 0.023246224969625473, -0.03671079874038696, 0.05592585355043411, -0.017969438806176186, -0.021297525614500046, -0.006332727149128914, 0.07894628494977951, 0.03936678543686867, -0.0032917980570346117, 0.0067716240882873535, -0.023956581950187683, -0.05155105143785477, 0.014824465848505497, -0.049760352820158005, 0.026639778167009354, -0.008751271292567253, -0.0020634792745113373, -0.022349676117300987, 0.026763714849948883, 0.00466413889080286, 0.06894848495721817, -0.01760917156934738, -0.04982415959239006, 0.004343515727669001, -0.021911734715104103, 0.008012738078832626, 0.033751390874385834, 0.0343516543507576, -0.004071456380188465, 0.0034615376498550177, -0.06703955680131912, 0.009893153794109821, 0.023672299459576607, -0.035914015024900436, -0.10384535044431686, -0.009181715548038483, -0.044446997344493866, 0.026389850303530693, -0.005818614736199379, 0.06103191152215004, -0.004870051518082619, 0.012830840423703194, -0.0002580583677627146, 0.008320637047290802, -0.003997201565653086, 0.02719683200120926, -0.06410416215658188, 0.05134265124797821, 0.10432656854391098, -0.018755408003926277, 0.02142322063446045, 0.051215361803770065, 0.05139292776584625, 0.009756091982126236, 0.03227861598134041, 0.014398263767361641, -0.0020580014679580927, 0.0531378872692585, 0.0009159184992313385, -0.006547098979353905, -0.003415537066757679, -0.0019452321575954556, 0.014107522554695606, 0.01856430619955063, -0.005017816089093685, 0.048606354743242264, -0.018450818955898285, 0.025498395785689354, -0.013584633357822895, 0.01436721533536911, -0.0987335741519928, 0.039527375251054764, -0.03573933243751526, 0.07827890664339066, -0.016760284081101418, -0.021811043843626976, 0.010827620513737202, -0.017727920785546303, 0.02219432219862938, 0.021236421540379524, 0.02544962801039219, 0.0017521633999422193, -0.03820648789405823, -0.015428191050887108, -0.013994590379297733, -0.039231836795806885, -0.015947986394166946, -0.03251771628856659, 0.005251395050436258, 0.0021717341151088476, 0.008047850802540779, 0.04468172416090965, -0.046100541949272156, -0.03328457102179527, 0.07985106855630875, -0.05657719075679779, 0.021974077448248863, 0.01636197417974472, -0.018787872046232224, 0.02302403375506401, 0.001346561941318214, 0.005723552778363228, -0.02386336773633957, -0.06379443407058716, -0.026563165709376335, 0.009104215539991856, -0.029887355864048004, 0.023516114801168442, 0.05269809812307358, -0.015986545011401176, 0.018460335209965706, 0.05164181813597679, 0.03029681369662285, 0.051446929574012756, 0.018740063533186913, -0.013391222804784775, 0.0059724608436226845, -0.05104639753699303, -0.07580331712961197, -0.007155367638915777, 0.0028088924009352922, 0.01954350806772709, -0.14265771210193634, 0.021858222782611847, 0.05931822955608368, 0.03949083760380745, -0.013747678138315678, 0.015068604610860348, 0.0615244135260582, -0.0037883021868765354, 0.01064202468842268, -0.01583140715956688, -0.02571197971701622, 0.023016374558210373, 0.00020406398107297719, -0.006049095652997494, 0.038436222821474075, 0.008713498711585999, -0.013489384204149246, 0.029147561639547348, 0.01318012923002243, -0.0006209524581208825, 0.03792524337768555, -0.006821766030043364, 0.0116606829687953, -0.0047430661506950855, 0.11011046916246414, 0.03162278234958649, -0.011813567019999027, -0.020395642146468163, 0.053935371339321136, -0.026562849059700966, 0.05341304838657379, -0.009197906590998173, -0.012361587956547737, 0.05019403249025345, -0.022244339808821678, -0.04746536538004875, -0.028129570186138153, 0.07748942077159882, 0.02555113099515438, 0.02642177604138851, 0.01180733647197485, 0.08250406384468079, 0.047254037111997604, -0.0026371462736278772, -0.03506022319197655, -0.044995296746492386, -0.0863545686006546, -0.026965122669935226, -0.019017649814486504, 0.024043947458267212, -0.029126202687621117, -0.039156485348939896, -0.019303753972053528, 0.04545186087489128, 0.058381836861371994, -0.038267627358436584, 0.005885488819330931, -0.02140345610678196, -0.017471347004175186, -0.012724500149488449, -0.054988645017147064, 0.03899760916829109, 0.02785051427781582, 0.003699934808537364, 0.010976732708513737, -0.055260948836803436, -0.040169648826122284, 0.02245563268661499, 0.011678988113999367, -0.0009886324405670166, -0.0025628400035202503, -0.0395442396402359, -0.021581528708338737, -0.025027014315128326, 0.011537627317011356, 0.06088942289352417, -0.009140100330114365, 0.04580448940396309, 0.009435770101845264, -0.04180280864238739, 0.03960627317428589, 0.038741424679756165, -0.026777828112244606, 0.002833585487678647, 0.0011217062128707767, -0.022083954885601997, -0.05315263196825981, -0.04205309599637985, 0.04562034830451012, -0.0072254217229783535, -0.02973032183945179, -0.0035180130507797003, -0.02704320289194584, -0.019567297771573067, 0.037788912653923035, 0.014769122935831547, -0.008519927971065044, 0.05081871524453163, -0.050189390778541565, 0.011257469654083252, 0.024421025067567825, -0.04868393391370773, -0.00934939831495285, -0.046377550810575485, -0.001001474098302424, -0.006582456640899181, 0.03402646631002426, 0.0014459394151344895, 0.023809539154171944, -0.08225651830434799, -0.01961502805352211, -0.040600262582302094, -0.017160983756184578, -0.04240107908844948, -0.023526383563876152, -0.04618926718831062, -0.02764553762972355, -0.004812351893633604, 0.03789021447300911, 0.01568782329559326, 0.052888188511133194, 0.03212286904454231, 0.017295517027378082, -0.016037840396165848, -0.02363922819495201, 0.05777455121278763, 0.02587471902370453, -0.02903411164879799, -0.03403814882040024, 0.03973119705915451, -0.009589065797626972, 0.03669728711247444, -0.011514349840581417, 0.004785229451954365, -0.014784551225602627, 0.005918135866522789, 0.035958871245384216, 0.04286293685436249, -0.09729185700416565, 0.01877903752028942, 0.016566818580031395, -0.02803219109773636, 0.01666433922946453, 0.02622883953154087, 0.07936946302652359, -0.0023852630984038115, 0.0032902322709560394, 0.035860516130924225, 0.01321091316640377, -0.03331108018755913, -0.02866492234170437, -0.06335082650184631, 0.025037037208676338, -0.06743777543306351, 0.022196639329195023, 0.019085779786109924, 0.07409270852804184, -0.04415537789463997, -0.035851191729307175, 0.004423481412231922, -0.032398343086242676, 0.043877262622117996, 0.00842997059226036, -0.010398142039775848, 0.010580291971564293, 0.003006142796948552, 0.03484474495053291, -0.01980593055486679, 0.004296119790524244, 0.034706272184848785, 0.07839873433113098, -0.014523761346936226, -0.03386479243636131, 0.02948944643139839, -0.012853787280619144, -0.06306281685829163, -0.12834767997264862, 0.05138631910085678, 0.05493517965078354, 0.011605002917349339, 0.001620253431610763, 0.026703089475631714, 0.03104560077190399, -0.036519814282655716, 0.045892030000686646, -0.025451917201280594, -0.05298776924610138, -0.019570142030715942, -0.060528386384248734, 0.04374101385474205, 0.005618659779429436, 0.045480333268642426, 0.04393331706523895, 0.0252686757594347, -0.0071691591292619705, 0.04249659180641174, 0.0022988133132457733, 0.002255932427942753, 0.00620277738198638, -0.007916491478681564, 0.04029126837849617, 0.0004234711523167789, 0.018702810630202293, -0.004431407433003187, -0.04686696454882622, -0.03670409694314003, -0.04104500636458397, -0.05747298523783684, 0.010836322791874409, 0.009353013709187508, 0.014638284221291542, -0.0032553623896092176, 0.016595209017395973, 0.01285529788583517, 0.015186245553195477, -0.01616000384092331, 0.0831158310174942, 0.01845732145011425, -0.008532090112566948, 0.021793320775032043, -0.0538671538233757, -0.013411097228527069, -0.02812410145998001, 0.003293363144621253, 0.001323531847447157, -0.032149460166692734, -0.02611570619046688, 0.0062676239758729935, 0.012438139878213406, -0.00458793668076396, -0.017148254439234734, 0.026388462632894516, -0.003561536781489849, -0.06742443889379501, -0.010806840844452381, 0.013313221745193005, -0.0010709688067436218, -0.025008849799633026, 0.04527439922094345, 0.011450549587607384, 0.025977201759815216, 0.00432526133954525, 0.04367512837052345, -0.042283330112695694, 0.08689726889133453, -0.04465695843100548, 0.05196625739336014, -0.034381914883852005, 0.07498005777597427, 0.013219290412962437, -0.030854566022753716, 0.009407620877027512, 0.05959205701947212, 0.00025402597384527326, -0.004722670651972294, -0.023822687566280365, -0.011127411387860775, 0.06774483621120453, 0.02349906787276268, 0.012559846974909306, 0.05584375932812691, -0.0027014920487999916, 0.03822506591677666, 0.001778472913429141, 0.003135145176202059, -0.004898448940366507, -0.04109407588839531, -0.010921472683548927, -0.0023536847438663244, -0.005467038601636887, 0.02844863198697567, -0.02816242352128029, 0.00910878274589777, -0.0005301610799506307, -0.020260831341147423, -0.011837259866297245, 0.0074048638343811035, 0.0330076701939106, 0.04631241410970688, 0.006901032757014036, 0.03778628259897232, -0.03253823518753052, -0.04035308584570885, -0.009153680875897408, -0.04743404686450958, -0.023646878078579903, -0.01274019479751587, -0.023659851402044296, -0.009903552010655403, 0.027719231322407722, 0.03643347695469856, -0.012292752042412758, 0.00797509029507637, -0.012398775666952133, 0.053438544273376465, 0.009874007664620876, 0.022641755640506744, -0.018230164423584938, -0.015220103785395622, 0.011325131170451641, 0.036286234855651855, -0.003910931758582592, -0.014778620563447475, 0.023624222725629807, -0.046514395624399185, -0.04348434507846832, -0.07065105438232422, 0.01178349182009697, -0.04983343929052353, -0.019145935773849487, 0.03206673264503479, -0.02248401753604412, 0.009849278256297112, -0.01454122457653284, 0.00698584271594882, 0.004334441851824522, -0.023265566676855087, 0.003949882462620735, -0.006641953252255917, 0.08778787404298782, -0.0813477486371994, -0.009150204248726368, 0.0004730928922072053, -0.02105945535004139, 0.0011214343830943108, 0.03659337759017944, 0.0039810677990317345, -0.0014178204583004117, -0.07788791507482529, -0.026244698092341423, 0.0048441207036376, 0.04476112127304077, 0.0004692444927059114, 0.0025287142489105463, -0.03630146384239197, 0.023211196064949036, 0.019544731825590134, -0.014876898378133774, 0.05812135338783264, 0.001799705671146512, -0.023032745346426964, -0.0072522531263530254, -0.005533810704946518, -0.004055136349052191, 0.0029441500082612038, -0.0026572630740702152, 0.02507956698536873, -0.07689077407121658, -0.02373884804546833, -0.035312335938215256, 0.0028323126025497913, -0.07526563107967377, 0.039079222828149796, -0.009688976220786572, 0.01521303690969944, -0.041489288210868835, -0.0774708166718483, -0.022667212411761284, 0.012534103356301785, -0.01208994910120964, 0.013675891794264317, -0.07183263450860977, 0.035214927047491074, -0.00638936460018158, -0.003275875002145767, 0.028514647856354713, -0.039225272834300995, -0.010135849937796593, -0.013547512702643871, -0.007063989527523518, -0.0006645081448368728, -0.08245290070772171, 0.06152914837002754, -0.08848331868648529, -0.047584373503923416, 0.06486338376998901, -0.0010509552666917443, -0.00958198681473732, -0.015381206758320332, 0.012677489779889584, -0.02541586197912693, -0.010829906910657883, -0.040315985679626465, 0.07084985077381134, -0.004795122891664505, 0.02392730861902237, 0.028320785611867905, 0.05429540574550629, -0.0022732384968549013, 0.013126975856721401, -0.036959439516067505, 0.011621418409049511, -0.060289300978183746, -0.012116503901779652, -0.03767351806163788, 0.015614232048392296, 0.013533436693251133, -0.03253301605582237, 0.0046608224511146545, -0.05888301506638527, -0.01786467805504799, 0.007653253152966499, 0.04693540930747986, -0.011819086037576199, 0.05970395728945732, 0.05808683857321739, -0.06625030189752579, 0.007262212224304676, -0.029628954827785492, -0.009970931336283684, 0.00799498800188303, -0.07201215624809265, 0.03358093649148941, -0.008195148780941963, 0.07860071212053299, 0.02052946388721466, 0.045350801199674606, -0.003071084851399064, 0.02013092301785946, 0.056559789925813675, 0.058278631418943405, 0.07482638955116272, 0.008291359059512615, 0.009787806309759617, -0.004322321619838476, -0.04185561463236809, 0.008748067542910576, 0.011986888013780117, 0.016066046431660652, -0.06783432513475418, -0.031172003597021103, 0.0015492119127884507, 0.058813706040382385, 0.014383298344910145, 0.06697336584329605, -0.05368299409747124, -0.02960067428648472, -0.047120142728090286, -0.04955565929412842, -0.005217657424509525, -0.09007036685943604, 0.031999994069337845, 0.010862707160413265, -0.013950289227068424, -0.03760684281587601, 0.038814835250377655, -0.013143139891326427, 0.015170537866652012, -0.038332752883434296, -0.026235496625304222, 0.042313672602176666, -0.045916978269815445, 0.028005585074424744, -0.049779921770095825, -0.07663951069116592, 0.03228982165455818, -0.025928109884262085, -0.04661466181278229, 0.0002908499154727906, -0.0021999541204422712, 0.0007238005637191236, -0.03531128168106079, -0.061222873628139496, -0.017476916313171387, -0.01740298978984356, 0.0411699004471302, 0.03663098067045212, -0.012567172758281231, -0.02384207397699356, -0.0495716892182827, 0.05265861749649048, -0.06794695556163788, -0.0032901072409003973, -0.004602099768817425, 0.021596305072307587, -0.015613319352269173, 0.045095719397068024, 0.023207608610391617, 0.015219530090689659, -0.016743505373597145, -0.050692327320575714, -0.03267280384898186, -0.004210365936160088, 0.0453140065073967, 0.045061614364385605, -0.03571528568863869, -0.03312259912490845, -0.04649952054023743, -0.030008066445589066, 0.0033748000860214233, -0.02008763700723648, 0.051922667771577835, 0.025600478053092957, -0.002138269366696477, -0.011598409153521061, -0.027945205569267273, 0.057059578597545624, -0.03662896528840065, 0.03476609289646149, -0.011600067839026451, 0.01249693613499403, 0.05066721513867378, 0.05829807370901108, 0.03269385173916817, -0.02920982986688614, 0.045436467975378036, 0.054977964609861374, -0.041667286306619644, -0.038430389016866684, -0.0009722664253786206, 0.012332976795732975, 0.005552294664084911, -0.028455253690481186, 0.052578654140233994, -0.0014285242650657892, -0.02592667192220688, -0.03244926780462265, 0.06457433104515076, 0.027424132451415062, -0.1045263484120369, -0.014906848780810833, -0.029855897650122643, 0.012489397078752518, -0.013975397683680058, 0.03069283626973629, -0.05032973736524582, -0.03481477126479149, 0.014706984162330627, 0.029036609455943108, -0.030401604250073433, 0.0070937820710241795, -0.07252137362957001, 0.04064387083053589, -0.0013125359546393156, 0.005951697938144207, -0.01268274150788784, -0.014818329364061356, 0.015600583516061306, 0.008206700906157494, 0.03827398642897606, 0.03186425194144249, -0.009759020991623402, -0.030846377834677696, 0.037899427115917206, -0.012731331400573254, 0.000188045363756828, 0.03360197693109512, -0.03826137259602547, 0.07449356466531754, 0.001238143420778215, -0.007641963195055723, 0.06467071920633316, -0.018405135720968246, 0.046564701944589615, -0.04106247425079346, -0.021856514737010002, 0.07621057331562042, -0.04304991289973259, -0.04306909069418907, -0.0304432176053524, 0.01527604553848505, -0.005707693751901388, 0.038819730281829834, 0.012942232191562653, 0.01949952356517315, -0.021732443943619728, 0.03682343289256096, 0.061600372195243835, -0.020474111661314964, -0.012682368978857994, 0.04342878982424736, 0.04062390327453613, 0.011738210916519165, 0.023332543671131134, 0.005282609257847071, -0.01856761984527111, 0.0955662876367569]]"
|
606
|
+
]
|
607
|
+
},
|
608
|
+
"execution_count": 15,
|
609
|
+
"metadata": {},
|
610
|
+
"output_type": "execute_result"
|
611
|
+
}
|
612
|
+
],
|
613
|
+
"source": [
|
614
|
+
"documents_embeddings = JSON.load(File.read(\"embeddings.json\"))"
|
615
|
+
]
|
616
|
+
},
|
617
|
+
{
|
618
|
+
"cell_type": "code",
|
619
|
+
"execution_count": 16,
|
620
|
+
"id": "1ac8bbf3",
|
621
|
+
"metadata": {},
|
622
|
+
"outputs": [
|
623
|
+
{
|
624
|
+
"data": {
|
625
|
+
"text/plain": [
|
626
|
+
"[#<Chroma::Resources::Embedding:0x00000001049957f0 @id=\"92118736-5484-4da4-b669-cde1cc03c369\", @embedding=[0.03228979930281639, 0.04374200478196144, 0.014979278668761253, -0.008304460905492306, -0.0036412617191672325, -0.04301042854785919, -0.08898357301950455, 0.010292756371200085, -0.04601152241230011, -0.01627073809504509, 0.0423235185444355, 0.035671770572662354, -0.03516373038291931, -0.10667434334754944, -0.0305192731320858, -0.00317957391962409, 0.00898097362369299, -0.04044022411108017, -0.03859616443514824, -2.847463656507898e-05, -0.02551359124481678, 0.015683457255363464, -0.03809453547000885, 0.026084054261446, 0.004630913957953453, -0.05073123052716255, -0.005383108276873827, 0.00900851096957922, -0.022124305367469788, 0.0008553847437724471, 0.03406482934951782, 0.006923138629645109, 0.016204921528697014, -0.0248075183480978, 0.020003201439976692, 0.015132367610931396, -0.01962454617023468, -0.03499720245599747, 0.01819889433681965, -0.0026789242401719093, -0.06468355655670166, 0.023844322189688683, 0.012745407409965992, -0.03798774629831314, 0.03615935146808624, 0.07037360221147537, -0.01144226361066103, -0.008408664725720882, 0.01989738829433918, -0.03551855683326721, -0.009068832732737064, -0.011420144699513912, -0.053683359175920486, -0.004001090303063393, -0.05866784229874611, -0.028031479567289352, -0.04380376264452934, 0.0010980378137901425, -0.014896156266331673, 0.07904498279094696, -0.002795759355649352, 0.042044781148433685, 0.007605243939906359, 0.029185891151428223, -0.0007026402745395899, 0.027147360146045685, 0.03978419303894043, 0.02210610918700695, -0.012754594907164574, 0.004411553964018822, -0.045703720301389694, 0.03973427787423134, -0.04186716303229332, -0.03332607448101044, -0.0015854788944125175, 0.09185648709535599, 0.03097948245704174, 0.008720682002604008, 0.011866453103721142, -0.025312529876828194, -0.04079953953623772, 0.004299015738070011, -0.07025784999132156, 0.01059478335082531, -0.005177331157028675, 0.01095830462872982, -0.014626510441303253, 0.03253486379981041, 0.018911102786660194, 0.058727703988552094, -0.02561543881893158, -0.051282405853271484, -0.001387969241477549, -0.007063963450491428, -0.017050521448254585, 0.03534478694200516, 0.03895848989486694, -0.002462860895320773, 0.013893542811274529, -0.08051256835460663, 0.027548780664801598, 0.03188430890440941, -0.028614526614546776, -0.08799060434103012, -0.0124077582731843, -0.04059695079922676, 0.038384631276130676, 0.0369478240609169, 0.04113375023007393, -0.007888813503086567, 0.022122997790575027, 0.021126342937350273, 0.020651601254940033, -0.026094896718859673, 0.04217687249183655, -0.055210452526807785, 0.05842498317360878, 0.08628174662590027, -0.006704084575176239, -0.008585629053413868, 0.029757793992757797, 0.03337784856557846, 0.02287106402218342, 0.041264794766902924, 0.008918775245547295, 0.01518432330340147, 0.04925611615180969, -0.02022290974855423, 0.000812036101706326, -0.019539330154657364, -0.0007718283450230956, 0.006669742986559868, 0.0041087307035923, -0.0065430463291704655, 0.06524598598480225, -0.01344641949981451, 0.04301084205508232, -0.02710653841495514, 0.0224214568734169, -0.09976571053266525, 0.021154599264264107, -0.05801623687148094, 0.07450253516435623, -0.011398211121559143, -0.0265400018543005, 0.0035808708053082228, -0.021095940843224525, 0.03220225125551224, 0.010811158455908298, 0.02507082000374794, -0.004476296249777079, -0.021498845890164375, 0.004178662318736315, -0.012609281577169895, -0.04164125397801399, -0.016675475984811783, -0.05637894198298454, -0.00039760651998221874, 0.013929449021816254, 0.015408114530146122, 0.043402332812547684, -0.04887295141816139, -0.029663026332855225, 0.07050147652626038, -0.07254905253648758, 0.011923249810934067, 0.03870907053351402, -0.02336050756275654, 0.012871748767793179, 0.02158389799296856, 0.020876718685030937, -0.007757823448628187, -0.0452481284737587, -0.03634555637836456, 0.0010483156656846404, -0.023808754980564117, 0.01596495322883129, 0.0714854747056961, 0.0018134007696062326, 0.010900470428168774, 0.03798305243253708, 0.019611285999417305, 0.044536292552948, 0.04413134977221489, -0.0038643558509647846, 0.016711169853806496, -0.04104889929294586, -0.051808178424835205, -0.005617272108793259, 0.017367791384458542, 0.02081133984029293, -0.1284034550189972, 0.04220516234636307, 0.05191464722156525, 0.04524064436554909, 0.009792013093829155, -0.008229931816458702, 0.07202937453985214, -0.016330188140273094, 0.016698677092790604, 0.012947555631399155, 0.0011996577959507704, 0.0176108218729496, 0.01835097186267376, -0.002980436198413372, 0.03625079616904259, 0.007594248279929161, 0.016943421214818954, 0.00224352371878922, 0.004125209059566259, 0.005772887729108334, 0.020054487511515617, -0.0021010898053646088, 0.023027975112199783, -0.028509322553873062, 0.12751130759716034, 0.021971622481942177, -0.003281302284449339, -0.018533241003751755, 0.0691463053226471, -0.007271943148225546, 0.02029179222881794, 0.010470682755112648, 0.005038999952375889, 0.062135156244039536, -0.04069601371884346, -0.03258826583623886, -0.02782304212450981, 0.08235930651426315, 0.016592130064964294, 0.00679009361192584, 0.01324023399502039, 0.06185121089220047, 0.04765875265002251, -0.011462446302175522, -0.041887979954481125, -0.030739666894078255, -0.06439483910799026, -0.03637808561325073, -0.03531050309538841, 0.0031687167938798666, -0.010794159024953842, -0.033315885812044144, -0.008723964914679527, 0.032397761940956116, 0.0710805356502533, -0.022815288975834846, 0.03034850023686886, -0.0049735307693481445, -0.018393810838460922, -0.03242115303874016, -0.06379392743110657, 0.032948728650808334, 0.025140387937426567, 0.00576405692845583, 0.014179369434714317, -0.062080904841423035, -0.06615299731492996, -0.003071098355576396, 0.01967025175690651, -0.03492140397429466, -0.007880707271397114, -0.057441577315330505, 0.002134165959432721, -0.025045592337846756, -0.008549192920327187, 0.048091255128383636, -0.014099105261266232, 0.04910006374120712, 0.010198627598583698, -0.029759423807263374, 0.04051714017987251, 0.06776628643274307, -0.034147296100854874, -0.026143042370676994, -0.006945130880922079, -0.008138955570757389, -0.05489006265997887, -0.04387400299310684, 0.046449482440948486, -0.019213583320379257, -0.022300662472844124, -0.008200730197131634, -0.03233560174703598, 0.003402227535843849, 0.011234400793910027, -0.002742406213656068, -0.02069772034883499, 0.038628507405519485, -0.04607287794351578, 0.016672592610120773, 0.04447432607412338, -0.033386338502168655, -0.007463876157999039, -0.052367787808179855, -0.013385064899921417, -0.022224988788366318, 0.04003913700580597, 0.013125717639923096, -0.0002900198451243341, -0.056771036237478256, -0.021361535415053368, -0.032924383878707886, -0.036289069801568985, -0.01641670987010002, -0.036186669021844864, -0.03189323469996452, -0.006797273643314838, 0.004788490012288094, 0.0068277702666819096, 0.02284691296517849, 0.059777747839689255, 0.04041121155023575, 0.019820593297481537, -0.050160445272922516, -0.04713621735572815, 0.04364219307899475, 0.02694518491625786, -0.015432613901793957, -0.029616745188832283, 0.07202371954917908, -0.011115726083517075, 0.015347307547926903, 0.014280633069574833, -0.002249254612252116, 0.019911013543605804, 0.00929440651088953, 0.020636478438973427, 0.028549201786518097, -0.10426840931177139, 0.028348496183753014, 0.04307335987687111, 0.006135597825050354, 0.011010325513780117, 0.03592953458428383, 0.05262245237827301, -0.004868381656706333, 0.01509394496679306, 0.038539350032806396, 0.0036197768058627844, -0.03548605740070343, -0.013938318938016891, -0.07516362518072128, 0.019009025767445564, -0.07927123457193375, 0.030785150825977325, 0.01724003255367279, 0.10079476982355118, -0.05789526179432869, -0.02340284176170826, 0.009072965942323208, -0.008314074948430061, 0.049702033400535583, 0.024742692708969116, 0.015528418123722076, 0.00030811724718660116, 0.027656322345137596, 0.023148810490965843, -0.006079103332012892, 0.02465176209807396, 0.06162107363343239, 0.06096961349248886, -0.00870341807603836, -0.056443870067596436, 0.026660898700356483, -0.004470273852348328, -0.04705857113003731, -0.10955272614955902, 0.03670618310570717, 0.04034575819969177, 0.0022876777220517397, 0.022575348615646362, 0.026446910575032234, 0.039642687886953354, -0.04091582074761391, 0.06343641877174377, -0.0324559323489666, -0.059108342975378036, -0.032128505408763885, -0.03802308812737465, 0.03290381282567978, -0.0035157541278749704, 0.04257664084434509, 0.030607890337705612, 0.03177213296294212, 0.006457501091063023, 0.043441928923130035, -0.009309722110629082, 0.01678484119474888, -0.007282345090061426, 0.006767678540199995, 0.047442350536584854, -0.006221396382898092, 0.03569779172539711, 0.01492008101195097, -0.03090090863406658, -0.05462432652711868, -0.03663143143057823, -0.039276741445064545, 0.003162890672683716, -0.006974783726036549, -0.01605837605893612, -0.007104011252522469, -0.00497934827581048, 0.01746319606900215, 0.009480826556682587, -0.021224495023489, 0.07256880402565002, 0.011976632289588451, -0.005190932657569647, 0.0137846814468503, -0.04294455423951149, -0.004575804807245731, -0.029106736183166504, -0.00396405765786767, 0.0162188820540905, -0.05301636457443237, -0.02794911526143551, 0.01211538352072239, 0.011636774055659771, 0.001118947984650731, -0.030082430690526962, 0.03802220895886421, 0.0018671602010726929, -0.04864092543721199, 0.010714356787502766, 0.019209057092666626, 0.01017305813729763, -0.023894254118204117, 0.06532895565032959, 0.0342823825776577, 0.0030650184489786625, 0.012038799934089184, 0.036395054310560226, -0.053046561777591705, 0.11037362366914749, -0.039846278727054596, 0.03756939247250557, -0.02227490022778511, 0.07383814454078674, 0.025581585243344307, -0.019945910200476646, 0.0238353181630373, 0.05056775361299515, 0.03472975641489029, -0.01490088738501072, 0.008003530092537403, -0.0071889315731823444, 0.06521737575531006, 0.03012000396847725, 0.029991764575242996, 0.04536479339003563, -0.041474953293800354, 0.040479108691215515, 0.013666344806551933, -0.03651740401983261, 0.010488818399608135, -0.02235833741724491, -0.00496672373265028, -0.0008841739036142826, 0.0019337058765813708, 0.016044721007347107, -0.02218416891992092, -0.0053303916938602924, 0.01921093836426735, -0.012642554938793182, -0.021373819559812546, 0.011728109791874886, 0.020468514412641525, 0.05577414110302925, -0.0009694069158285856, 0.04059097170829773, -0.02179683931171894, -0.026009798049926758, -0.04639671742916107, -0.024962976574897766, -0.014504465274512768, -0.0220566987991333, -0.03474007174372673, -0.002774641616269946, 0.027694355696439743, 0.050290998071432114, 0.0032427459955215454, -0.023253049701452255, -0.04699283838272095, 0.04116019234061241, 0.020931316539645195, 0.019338443875312805, -0.05171588063240051, 0.005383999552577734, 0.008072840981185436, 0.03612837567925453, -0.006952731870114803, -0.03698558732867241, 0.017056601122021675, -0.0421205498278141, -0.04169953614473343, -0.05199119448661804, 0.008002465590834618, -0.06126873567700386, -0.005950680002570152, 0.02284565567970276, -0.023383231833577156, 0.01929771527647972, -0.023887041956186295, -0.011285330168902874, 0.006013786420226097, -0.009053424932062626, 0.015977272763848305, -0.010940823704004288, 0.08568454533815384, -0.07215405255556107, -0.021054301410913467, -0.0248736459761858, 0.01133863627910614, -0.030574915930628777, 0.04119018837809563, -0.005135617684572935, 0.01942487619817257, -0.05801719054579735, -0.006375058088451624, 0.04047420620918274, 0.0459258034825325, -1.911036451929249e-05, 0.00030676211463287473, -0.03143990784883499, 0.034778762608766556, 0.031034106388688087, -0.0039030632469803095, 0.05057138949632645, 0.005497979465872049, -0.04800192266702652, -0.002724874299019575, -0.010492730885744095, 0.004971216898411512, 0.02917555905878544, -0.009676370769739151, 0.015930138528347015, -0.09533631056547165, -0.025691652670502663, -0.023998329415917397, 0.0009392568608745933, -0.08232671767473221, 0.040197454392910004, 0.0007667712634429336, 0.00529611436650157, -0.03325199708342552, -0.0531618669629097, -0.032503001391887665, 0.01928878203034401, -0.00836867094039917, 0.007887274026870728, -0.06439164280891418, 0.02499997615814209, -0.026714514940977097, 0.0038523240946233273, 0.040690355002880096, -0.04664188623428345, 0.006983313709497452, -0.023159924894571304, 0.020506396889686584, -0.010959654115140438, -0.06715796887874603, 0.06896034628152847, -0.08469228446483612, -0.038469359278678894, 0.06704585999250412, -0.028775928542017937, -0.009676976129412651, -0.016521157696843147, 0.035746727138757706, -0.02713773399591446, -0.01144022960215807, -0.03221476078033447, 0.0852692574262619, -0.015971260145306587, -0.003657831344753504, 0.03357626497745514, 0.06840836256742477, -0.017202356830239296, -0.008342721499502659, 0.0035089135635644197, -0.02370384894311428, -0.04318295419216156, -0.0039469413459300995, -0.019475433975458145, 0.022524060681462288, 0.027003657072782516, -0.038293030112981796, 0.031019441783428192, -0.05147705227136612, -0.01628013141453266, 0.01737838052213192, 0.0252314992249012, -0.009901595301926136, 0.04354410618543625, 0.037365593016147614, -0.0286478940397501, -0.022008292376995087, -0.014669063501060009, 0.015262885950505733, 0.011050564236938953, -0.08895622938871384, 0.04134802147746086, -0.016268711537122726, 0.07376548647880554, 0.009304611012339592, 0.041516561061143875, -0.00583286676555872, 0.026974735781550407, 0.06593198329210281, 0.034738361835479736, 0.06453622877597809, -0.00207549799233675, -0.01164978463202715, 0.010376514866948128, -0.03948894143104553, 0.027864789590239525, 0.009509708732366562, 0.03852664679288864, -0.049469564110040665, -0.002015027217566967, -0.015586158260703087, 0.08130050450563431, 0.010879945009946823, 0.04154130443930626, -0.07157342880964279, -0.00017352669965475798, -0.029232444241642952, -0.040825944393873215, -0.005744085647165775, -0.08170077204704285, 0.039324529469013214, -0.007448403164744377, -0.041549306362867355, -0.048940032720565796, 0.04981829971075058, -0.012164590880274773, 0.015784665942192078, -0.05923778563737869, -0.025606177747249603, 0.05182402580976486, -0.017923628911376, 0.043175458908081055, -0.04361496865749359, -0.07970111072063446, 0.036119718104600906, -0.020883092656731606, -0.03686083108186722, 0.01086772233247757, -0.022523140534758568, -0.0078078387305140495, -0.012556010857224464, -0.04712480306625366, -0.017338477075099945, 0.019165020436048508, 0.016326624900102615, 0.0305654164403677, -0.014697655104100704, -0.022187815979123116, -0.05051249638199806, 0.06099841371178627, -0.05704561248421669, -0.011833279393613338, -0.005388813558965921, 0.04473598673939705, -0.015221077017486095, 0.05795009806752205, 0.02312096394598484, 0.0075258370488882065, -0.010232404805719852, -0.04368416965007782, -0.018344486132264137, 0.002154686488211155, 0.04324810951948166, 0.03754868358373642, -0.04392080008983612, -0.043402522802352905, -0.03687596321105957, -0.03518514335155487, 0.009493282064795494, -0.0041145095601677895, -0.004101223777979612, 0.008156840689480305, 0.00020232163660693914, -0.012252955697476864, -0.01926516555249691, 0.05196809768676758, -0.035034339874982834, 0.020020512863993645, -0.0158267579972744, 0.006052207201719284, 0.03241325914859772, 0.02777276746928692, 0.032777801156044006, -0.01226889993995428, 0.06021249294281006, 0.04282548651099205, -0.04650556668639183, -0.05326072871685028, 0.016466110944747925, -0.002826716983690858, 0.022099485620856285, -0.012507491745054722, 0.016525873914361, -0.011513783596456051, -0.01726444996893406, -0.037607740610837936, 0.059679411351680756, 0.02615354210138321, -0.08533825725317001, -0.012763634324073792, -0.03461074456572533, -0.0070082577876746655, -0.01810484007000923, 0.019929876551032066, -0.050364892929792404, -0.022224780172109604, 0.022669365629553795, 0.04501258209347725, -0.011874476447701454, -0.0026172054931521416, -0.07258472591638565, 0.03750914707779884, -0.031652405858039856, -0.007753782439976931, -0.021108277142047882, -0.023893136531114578, 0.024009691551327705, 0.021072732284665108, 0.03558477386832237, 0.025773843750357628, -0.017213836312294006, -0.028489870950579643, 0.023708000779151917, 0.007439128588885069, 0.0071917143650352955, 0.03626685217022896, -0.03297760710120201, 0.06409215927124023, 0.03508260101079941, -0.0024931596126407385, 0.06305541098117828, -0.02701105922460556, 0.032071553170681, -0.06274864822626114, -0.0038116576615720987, 0.06042008101940155, -0.0268373042345047, -0.04468357190489769, -0.031169043853878975, 0.01351690199226141, -0.012788795866072178, 0.029781602323055267, 0.016967879608273506, 0.03223644942045212, -0.017847245559096336, 0.04072536900639534, 0.06372857093811035, -0.019674163311719894, -0.01857542246580124, 0.04360852390527725, 0.03253550082445145, 0.006603616289794445, -0.00363582419231534, -0.01563490927219391, -0.015182333067059517, 0.11372384428977966], @metadata={:source=>\"ruby.txt\"}, @document=\"Array#any?\\n\\nany? → true or false\\nany? {|element| ... } → true or false\\nany?(obj) → true or false\\n\\nReturns true if any element of self meets a given criterion.\\n\\nWith no block given and no argument, returns true if self has any truthy element, false otherwise:\\n\\n[nil, 0, false].any? # => true\\n[nil, false].any? # => false\\n[].any? # => false\\nWith a block given and no argument, calls the block with each element in self; returns true if the block returns any truthy value, false otherwise:\\n\\n[0, 1, 2].any? {|element| element > 1 } # => true\\n[0, 1, 2].any? {|element| element > 2 } # => false\\nIf argument obj is given, returns true if obj.=== any element, false otherwise:\\n\\n['food', 'drink'].any?(/foo/) # => true\\n['food', 'drink'].any?(/bar/) # => false\\n[].any?(/foo/) # => false\\n[0, 1, 2].any?(1) # => true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns\", @distance=nil>, #<Chroma::Resources::Embedding:0x0000000104995750 @id=\"8a63e0b7-9211-443a-b7f7-9ae387348749\", @embedding=[0.01605958677828312, 0.030003096908330917, 0.025761693716049194, -0.01957627572119236, -0.014232398942112923, -0.039433401077985764, -0.052275773137807846, -0.014058810658752918, -0.047918811440467834, -0.026954464614391327, 0.020939834415912628, 0.05790621042251587, -0.03408670797944069, -0.10953404009342194, -0.01096643041819334, -0.0011546972673386335, 0.007385305128991604, -0.05103980377316475, -0.04718507453799248, 0.017328854650259018, -0.04007676988840103, 0.033916473388671875, -0.04406605660915375, 0.019235199317336082, 0.003078594570979476, -0.060211047530174255, 0.015151334926486015, 0.02847081795334816, -0.026908403262495995, 0.014980873093008995, 0.03198567032814026, 0.012704729102551937, 0.01765180192887783, -0.027017798274755478, 0.03376148268580437, 0.017599033191800117, 0.0006646771216765046, -0.051346372812986374, 0.0033767514396458864, 0.0007753576501272619, -0.06594289094209671, 0.01464784611016512, 0.01948600821197033, -0.04875009134411812, 0.027182718738913536, 0.057590991258621216, 0.006650134455412626, -0.022266749292612076, 0.02270965650677681, -0.025416556745767593, -0.013004115782678127, -0.02796812728047371, -0.035526473075151443, 0.004798163194209337, -0.041392941027879715, -0.0062143937684595585, -0.02067559026181698, -0.010122218169271946, -0.004470644518733025, 0.056735098361968994, -0.0030726587865501642, 0.050774551928043365, 0.031308289617300034, 0.029799889773130417, 0.02511328086256981, 0.012285269796848297, 0.009048471227288246, 0.01319021638482809, -0.01524567510932684, 0.023246224969625473, -0.03671079874038696, 0.05592585355043411, -0.017969438806176186, -0.021297525614500046, -0.006332727149128914, 0.07894628494977951, 0.03936678543686867, -0.0032917980570346117, 0.0067716240882873535, -0.023956581950187683, -0.05155105143785477, 0.014824465848505497, -0.049760352820158005, 0.026639778167009354, -0.008751271292567253, -0.0020634792745113373, -0.022349676117300987, 0.026763714849948883, 0.00466413889080286, 0.06894848495721817, -0.01760917156934738, -0.04982415959239006, 0.004343515727669001, -0.021911734715104103, 0.008012738078832626, 0.033751390874385834, 0.0343516543507576, -0.004071456380188465, 0.0034615376498550177, -0.06703955680131912, 0.009893153794109821, 0.023672299459576607, -0.035914015024900436, -0.10384535044431686, -0.009181715548038483, -0.044446997344493866, 0.026389850303530693, -0.005818614736199379, 0.06103191152215004, -0.004870051518082619, 0.012830840423703194, -0.0002580583677627146, 0.008320637047290802, -0.003997201565653086, 0.02719683200120926, -0.06410416215658188, 0.05134265124797821, 0.10432656854391098, -0.018755408003926277, 0.02142322063446045, 0.051215361803770065, 0.05139292776584625, 0.009756091982126236, 0.03227861598134041, 0.014398263767361641, -0.0020580014679580927, 0.0531378872692585, 0.0009159184992313385, -0.006547098979353905, -0.003415537066757679, -0.0019452321575954556, 0.014107522554695606, 0.01856430619955063, -0.005017816089093685, 0.048606354743242264, -0.018450818955898285, 0.025498395785689354, -0.013584633357822895, 0.01436721533536911, -0.0987335741519928, 0.039527375251054764, -0.03573933243751526, 0.07827890664339066, -0.016760284081101418, -0.021811043843626976, 0.010827620513737202, -0.017727920785546303, 0.02219432219862938, 0.021236421540379524, 0.02544962801039219, 0.0017521633999422193, -0.03820648789405823, -0.015428191050887108, -0.013994590379297733, -0.039231836795806885, -0.015947986394166946, -0.03251771628856659, 0.005251395050436258, 0.0021717341151088476, 0.008047850802540779, 0.04468172416090965, -0.046100541949272156, -0.03328457102179527, 0.07985106855630875, -0.05657719075679779, 0.021974077448248863, 0.01636197417974472, -0.018787872046232224, 0.02302403375506401, 0.001346561941318214, 0.005723552778363228, -0.02386336773633957, -0.06379443407058716, -0.026563165709376335, 0.009104215539991856, -0.029887355864048004, 0.023516114801168442, 0.05269809812307358, -0.015986545011401176, 0.018460335209965706, 0.05164181813597679, 0.03029681369662285, 0.051446929574012756, 0.018740063533186913, -0.013391222804784775, 0.0059724608436226845, -0.05104639753699303, -0.07580331712961197, -0.007155367638915777, 0.0028088924009352922, 0.01954350806772709, -0.14265771210193634, 0.021858222782611847, 0.05931822955608368, 0.03949083760380745, -0.013747678138315678, 0.015068604610860348, 0.0615244135260582, -0.0037883021868765354, 0.01064202468842268, -0.01583140715956688, -0.02571197971701622, 0.023016374558210373, 0.00020406398107297719, -0.006049095652997494, 0.038436222821474075, 0.008713498711585999, -0.013489384204149246, 0.029147561639547348, 0.01318012923002243, -0.0006209524581208825, 0.03792524337768555, -0.006821766030043364, 0.0116606829687953, -0.0047430661506950855, 0.11011046916246414, 0.03162278234958649, -0.011813567019999027, -0.020395642146468163, 0.053935371339321136, -0.026562849059700966, 0.05341304838657379, -0.009197906590998173, -0.012361587956547737, 0.05019403249025345, -0.022244339808821678, -0.04746536538004875, -0.028129570186138153, 0.07748942077159882, 0.02555113099515438, 0.02642177604138851, 0.01180733647197485, 0.08250406384468079, 0.047254037111997604, -0.0026371462736278772, -0.03506022319197655, -0.044995296746492386, -0.0863545686006546, -0.026965122669935226, -0.019017649814486504, 0.024043947458267212, -0.029126202687621117, -0.039156485348939896, -0.019303753972053528, 0.04545186087489128, 0.058381836861371994, -0.038267627358436584, 0.005885488819330931, -0.02140345610678196, -0.017471347004175186, -0.012724500149488449, -0.054988645017147064, 0.03899760916829109, 0.02785051427781582, 0.003699934808537364, 0.010976732708513737, -0.055260948836803436, -0.040169648826122284, 0.02245563268661499, 0.011678988113999367, -0.0009886324405670166, -0.0025628400035202503, -0.0395442396402359, -0.021581528708338737, -0.025027014315128326, 0.011537627317011356, 0.06088942289352417, -0.009140100330114365, 0.04580448940396309, 0.009435770101845264, -0.04180280864238739, 0.03960627317428589, 0.038741424679756165, -0.026777828112244606, 0.002833585487678647, 0.0011217062128707767, -0.022083954885601997, -0.05315263196825981, -0.04205309599637985, 0.04562034830451012, -0.0072254217229783535, -0.02973032183945179, -0.0035180130507797003, -0.02704320289194584, -0.019567297771573067, 0.037788912653923035, 0.014769122935831547, -0.008519927971065044, 0.05081871524453163, -0.050189390778541565, 0.011257469654083252, 0.024421025067567825, -0.04868393391370773, -0.00934939831495285, -0.046377550810575485, -0.001001474098302424, -0.006582456640899181, 0.03402646631002426, 0.0014459394151344895, 0.023809539154171944, -0.08225651830434799, -0.01961502805352211, -0.040600262582302094, -0.017160983756184578, -0.04240107908844948, -0.023526383563876152, -0.04618926718831062, -0.02764553762972355, -0.004812351893633604, 0.03789021447300911, 0.01568782329559326, 0.052888188511133194, 0.03212286904454231, 0.017295517027378082, -0.016037840396165848, -0.02363922819495201, 0.05777455121278763, 0.02587471902370453, -0.02903411164879799, -0.03403814882040024, 0.03973119705915451, -0.009589065797626972, 0.03669728711247444, -0.011514349840581417, 0.004785229451954365, -0.014784551225602627, 0.005918135866522789, 0.035958871245384216, 0.04286293685436249, -0.09729185700416565, 0.01877903752028942, 0.016566818580031395, -0.02803219109773636, 0.01666433922946453, 0.02622883953154087, 0.07936946302652359, -0.0023852630984038115, 0.0032902322709560394, 0.035860516130924225, 0.01321091316640377, -0.03331108018755913, -0.02866492234170437, -0.06335082650184631, 0.025037037208676338, -0.06743777543306351, 0.022196639329195023, 0.019085779786109924, 0.07409270852804184, -0.04415537789463997, -0.035851191729307175, 0.004423481412231922, -0.032398343086242676, 0.043877262622117996, 0.00842997059226036, -0.010398142039775848, 0.010580291971564293, 0.003006142796948552, 0.03484474495053291, -0.01980593055486679, 0.004296119790524244, 0.034706272184848785, 0.07839873433113098, -0.014523761346936226, -0.03386479243636131, 0.02948944643139839, -0.012853787280619144, -0.06306281685829163, -0.12834767997264862, 0.05138631910085678, 0.05493517965078354, 0.011605002917349339, 0.001620253431610763, 0.026703089475631714, 0.03104560077190399, -0.036519814282655716, 0.045892030000686646, -0.025451917201280594, -0.05298776924610138, -0.019570142030715942, -0.060528386384248734, 0.04374101385474205, 0.005618659779429436, 0.045480333268642426, 0.04393331706523895, 0.0252686757594347, -0.0071691591292619705, 0.04249659180641174, 0.0022988133132457733, 0.002255932427942753, 0.00620277738198638, -0.007916491478681564, 0.04029126837849617, 0.0004234711523167789, 0.018702810630202293, -0.004431407433003187, -0.04686696454882622, -0.03670409694314003, -0.04104500636458397, -0.05747298523783684, 0.010836322791874409, 0.009353013709187508, 0.014638284221291542, -0.0032553623896092176, 0.016595209017395973, 0.01285529788583517, 0.015186245553195477, -0.01616000384092331, 0.0831158310174942, 0.01845732145011425, -0.008532090112566948, 0.021793320775032043, -0.0538671538233757, -0.013411097228527069, -0.02812410145998001, 0.003293363144621253, 0.001323531847447157, -0.032149460166692734, -0.02611570619046688, 0.0062676239758729935, 0.012438139878213406, -0.00458793668076396, -0.017148254439234734, 0.026388462632894516, -0.003561536781489849, -0.06742443889379501, -0.010806840844452381, 0.013313221745193005, -0.0010709688067436218, -0.025008849799633026, 0.04527439922094345, 0.011450549587607384, 0.025977201759815216, 0.00432526133954525, 0.04367512837052345, -0.042283330112695694, 0.08689726889133453, -0.04465695843100548, 0.05196625739336014, -0.034381914883852005, 0.07498005777597427, 0.013219290412962437, -0.030854566022753716, 0.009407620877027512, 0.05959205701947212, 0.00025402597384527326, -0.004722670651972294, -0.023822687566280365, -0.011127411387860775, 0.06774483621120453, 0.02349906787276268, 0.012559846974909306, 0.05584375932812691, -0.0027014920487999916, 0.03822506591677666, 0.001778472913429141, 0.003135145176202059, -0.004898448940366507, -0.04109407588839531, -0.010921472683548927, -0.0023536847438663244, -0.005467038601636887, 0.02844863198697567, -0.02816242352128029, 0.00910878274589777, -0.0005301610799506307, -0.020260831341147423, -0.011837259866297245, 0.0074048638343811035, 0.0330076701939106, 0.04631241410970688, 0.006901032757014036, 0.03778628259897232, -0.03253823518753052, -0.04035308584570885, -0.009153680875897408, -0.04743404686450958, -0.023646878078579903, -0.01274019479751587, -0.023659851402044296, -0.009903552010655403, 0.027719231322407722, 0.03643347695469856, -0.012292752042412758, 0.00797509029507637, -0.012398775666952133, 0.053438544273376465, 0.009874007664620876, 0.022641755640506744, -0.018230164423584938, -0.015220103785395622, 0.011325131170451641, 0.036286234855651855, -0.003910931758582592, -0.014778620563447475, 0.023624222725629807, -0.046514395624399185, -0.04348434507846832, -0.07065105438232422, 0.01178349182009697, -0.04983343929052353, -0.019145935773849487, 0.03206673264503479, -0.02248401753604412, 0.009849278256297112, -0.01454122457653284, 0.00698584271594882, 0.004334441851824522, -0.023265566676855087, 0.003949882462620735, -0.006641953252255917, 0.08778787404298782, -0.0813477486371994, -0.009150204248726368, 0.0004730928922072053, -0.02105945535004139, 0.0011214343830943108, 0.03659337759017944, 0.0039810677990317345, -0.0014178204583004117, -0.07788791507482529, -0.026244698092341423, 0.0048441207036376, 0.04476112127304077, 0.0004692444927059114, 0.0025287142489105463, -0.03630146384239197, 0.023211196064949036, 0.019544731825590134, -0.014876898378133774, 0.05812135338783264, 0.001799705671146512, -0.023032745346426964, -0.0072522531263530254, -0.005533810704946518, -0.004055136349052191, 0.0029441500082612038, -0.0026572630740702152, 0.02507956698536873, -0.07689077407121658, -0.02373884804546833, -0.035312335938215256, 0.0028323126025497913, -0.07526563107967377, 0.039079222828149796, -0.009688976220786572, 0.01521303690969944, -0.041489288210868835, -0.0774708166718483, -0.022667212411761284, 0.012534103356301785, -0.01208994910120964, 0.013675891794264317, -0.07183263450860977, 0.035214927047491074, -0.00638936460018158, -0.003275875002145767, 0.028514647856354713, -0.039225272834300995, -0.010135849937796593, -0.013547512702643871, -0.007063989527523518, -0.0006645081448368728, -0.08245290070772171, 0.06152914837002754, -0.08848331868648529, -0.047584373503923416, 0.06486338376998901, -0.0010509552666917443, -0.00958198681473732, -0.015381206758320332, 0.012677489779889584, -0.02541586197912693, -0.010829906910657883, -0.040315985679626465, 0.07084985077381134, -0.004795122891664505, 0.02392730861902237, 0.028320785611867905, 0.05429540574550629, -0.0022732384968549013, 0.013126975856721401, -0.036959439516067505, 0.011621418409049511, -0.060289300978183746, -0.012116503901779652, -0.03767351806163788, 0.015614232048392296, 0.013533436693251133, -0.03253301605582237, 0.0046608224511146545, -0.05888301506638527, -0.01786467805504799, 0.007653253152966499, 0.04693540930747986, -0.011819086037576199, 0.05970395728945732, 0.05808683857321739, -0.06625030189752579, 0.007262212224304676, -0.029628954827785492, -0.009970931336283684, 0.00799498800188303, -0.07201215624809265, 0.03358093649148941, -0.008195148780941963, 0.07860071212053299, 0.02052946388721466, 0.045350801199674606, -0.003071084851399064, 0.02013092301785946, 0.056559789925813675, 0.058278631418943405, 0.07482638955116272, 0.008291359059512615, 0.009787806309759617, -0.004322321619838476, -0.04185561463236809, 0.008748067542910576, 0.011986888013780117, 0.016066046431660652, -0.06783432513475418, -0.031172003597021103, 0.0015492119127884507, 0.058813706040382385, 0.014383298344910145, 0.06697336584329605, -0.05368299409747124, -0.02960067428648472, -0.047120142728090286, -0.04955565929412842, -0.005217657424509525, -0.09007036685943604, 0.031999994069337845, 0.010862707160413265, -0.013950289227068424, -0.03760684281587601, 0.038814835250377655, -0.013143139891326427, 0.015170537866652012, -0.038332752883434296, -0.026235496625304222, 0.042313672602176666, -0.045916978269815445, 0.028005585074424744, -0.049779921770095825, -0.07663951069116592, 0.03228982165455818, -0.025928109884262085, -0.04661466181278229, 0.0002908499154727906, -0.0021999541204422712, 0.0007238005637191236, -0.03531128168106079, -0.061222873628139496, -0.017476916313171387, -0.01740298978984356, 0.0411699004471302, 0.03663098067045212, -0.012567172758281231, -0.02384207397699356, -0.0495716892182827, 0.05265861749649048, -0.06794695556163788, -0.0032901072409003973, -0.004602099768817425, 0.021596305072307587, -0.015613319352269173, 0.045095719397068024, 0.023207608610391617, 0.015219530090689659, -0.016743505373597145, -0.050692327320575714, -0.03267280384898186, -0.004210365936160088, 0.0453140065073967, 0.045061614364385605, -0.03571528568863869, -0.03312259912490845, -0.04649952054023743, -0.030008066445589066, 0.0033748000860214233, -0.02008763700723648, 0.051922667771577835, 0.025600478053092957, -0.002138269366696477, -0.011598409153521061, -0.027945205569267273, 0.057059578597545624, -0.03662896528840065, 0.03476609289646149, -0.011600067839026451, 0.01249693613499403, 0.05066721513867378, 0.05829807370901108, 0.03269385173916817, -0.02920982986688614, 0.045436467975378036, 0.054977964609861374, -0.041667286306619644, -0.038430389016866684, -0.0009722664253786206, 0.012332976795732975, 0.005552294664084911, -0.028455253690481186, 0.052578654140233994, -0.0014285242650657892, -0.02592667192220688, -0.03244926780462265, 0.06457433104515076, 0.027424132451415062, -0.1045263484120369, -0.014906848780810833, -0.029855897650122643, 0.012489397078752518, -0.013975397683680058, 0.03069283626973629, -0.05032973736524582, -0.03481477126479149, 0.014706984162330627, 0.029036609455943108, -0.030401604250073433, 0.0070937820710241795, -0.07252137362957001, 0.04064387083053589, -0.0013125359546393156, 0.005951697938144207, -0.01268274150788784, -0.014818329364061356, 0.015600583516061306, 0.008206700906157494, 0.03827398642897606, 0.03186425194144249, -0.009759020991623402, -0.030846377834677696, 0.037899427115917206, -0.012731331400573254, 0.000188045363756828, 0.03360197693109512, -0.03826137259602547, 0.07449356466531754, 0.001238143420778215, -0.007641963195055723, 0.06467071920633316, -0.018405135720968246, 0.046564701944589615, -0.04106247425079346, -0.021856514737010002, 0.07621057331562042, -0.04304991289973259, -0.04306909069418907, -0.0304432176053524, 0.01527604553848505, -0.005707693751901388, 0.038819730281829834, 0.012942232191562653, 0.01949952356517315, -0.021732443943619728, 0.03682343289256096, 0.061600372195243835, -0.020474111661314964, -0.012682368978857994, 0.04342878982424736, 0.04062390327453613, 0.011738210916519165, 0.023332543671131134, 0.005282609257847071, -0.01856761984527111, 0.0955662876367569], @metadata={:source=>\"ruby.txt\"}, @document=\"true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns a new Array whose elements are the return values from the block:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map {|element| element.class }\\na1 # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map>\\nArray#collect is an alias for Array#map.\\n\\nAlias for: collect\\nmap! {|element| ... } → self\\nmap! → new_enumerator\\nCalls the block, if given, with each element; replaces the element with the block’s return value:\\n\\na = [:foo, 'bar', 2]\\na.map! { |element| element.class } # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map!\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map!>\\nArray#collect! is an alias for Array#map!.\\n\\nAlias for: collect!\\n\", @distance=nil>]"
|
627
|
+
]
|
628
|
+
},
|
629
|
+
"metadata": {},
|
630
|
+
"output_type": "display_data"
|
631
|
+
}
|
632
|
+
],
|
633
|
+
"source": [
|
634
|
+
"embeddings = texts.map.with_index do |text, index|\n",
|
635
|
+
" Chroma::Resources::Embedding.new(id: SecureRandom.uuid, embedding: documents_embeddings[index], metadata: metadatas[index], document: text.content) \n",
|
636
|
+
"end\n",
|
637
|
+
"\n",
|
638
|
+
"IRuby.display embeddings"
|
639
|
+
]
|
640
|
+
},
|
641
|
+
{
|
642
|
+
"cell_type": "code",
|
643
|
+
"execution_count": 17,
|
644
|
+
"id": "8556fdf6",
|
645
|
+
"metadata": {},
|
646
|
+
"outputs": [
|
647
|
+
{
|
648
|
+
"data": {
|
649
|
+
"text/plain": [
|
650
|
+
"true"
|
651
|
+
]
|
652
|
+
},
|
653
|
+
"execution_count": 17,
|
654
|
+
"metadata": {},
|
655
|
+
"output_type": "execute_result"
|
656
|
+
}
|
657
|
+
],
|
658
|
+
"source": [
|
659
|
+
"collection.add(embeddings)"
|
660
|
+
]
|
661
|
+
},
|
662
|
+
{
|
663
|
+
"cell_type": "code",
|
664
|
+
"execution_count": 18,
|
665
|
+
"id": "b045d12d",
|
666
|
+
"metadata": {},
|
667
|
+
"outputs": [
|
668
|
+
{
|
669
|
+
"data": {
|
670
|
+
"text/plain": [
|
671
|
+
"2"
|
672
|
+
]
|
673
|
+
},
|
674
|
+
"metadata": {},
|
675
|
+
"output_type": "display_data"
|
676
|
+
}
|
677
|
+
],
|
678
|
+
"source": [
|
679
|
+
"IRuby.display collection.count"
|
680
|
+
]
|
681
|
+
},
|
682
|
+
{
|
683
|
+
"cell_type": "markdown",
|
684
|
+
"id": "8a641ecd",
|
685
|
+
"metadata": {},
|
686
|
+
"source": [
|
687
|
+
"### Vector Store naive implementation"
|
688
|
+
]
|
689
|
+
},
|
690
|
+
{
|
691
|
+
"cell_type": "code",
|
692
|
+
"execution_count": 19,
|
693
|
+
"id": "da118706",
|
694
|
+
"metadata": {},
|
695
|
+
"outputs": [
|
696
|
+
{
|
697
|
+
"data": {
|
698
|
+
"text/plain": [
|
699
|
+
":similarity_search"
|
700
|
+
]
|
701
|
+
},
|
702
|
+
"execution_count": 19,
|
703
|
+
"metadata": {},
|
704
|
+
"output_type": "execute_result"
|
705
|
+
}
|
706
|
+
],
|
707
|
+
"source": [
|
708
|
+
"class VectorStore\n",
|
709
|
+
" def initialize(store, search_type = \"similarity\")\n",
|
710
|
+
" @store = store\n",
|
711
|
+
" @search_type\n",
|
712
|
+
" end\n",
|
713
|
+
" \n",
|
714
|
+
" def relevant_documents(query)\n",
|
715
|
+
" if @search_type == \"similarity\"\n",
|
716
|
+
" @store.similarity_search(query)\n",
|
717
|
+
" end\n",
|
718
|
+
" end\n",
|
719
|
+
" \n",
|
720
|
+
" protected\n",
|
721
|
+
" \n",
|
722
|
+
" def text_to_embeddings(query)\n",
|
723
|
+
" command = <<~PYTHON\n",
|
724
|
+
"python - << EOF\n",
|
725
|
+
"import json\n",
|
726
|
+
"from InstructorEmbedding import INSTRUCTOR\n",
|
727
|
+
"from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
|
728
|
+
"\n",
|
729
|
+
"from langchain.embeddings import HuggingFaceInstructEmbeddings\n",
|
730
|
+
"instructor_embeddings = HuggingFaceInstructEmbeddings(model_name=\"hkunlp/instructor-xl\", \n",
|
731
|
+
" model_kwargs={\"device\": \"cpu\"})\n",
|
732
|
+
" \n",
|
733
|
+
"embeddings = instructor_embeddings.embed_documents(list([\"#{query}\"]))\n",
|
734
|
+
"\n",
|
735
|
+
"print(\"========\")\n",
|
736
|
+
"print(embeddings)\n",
|
737
|
+
"EOF\n",
|
738
|
+
"PYTHON\n",
|
739
|
+
"\n",
|
740
|
+
" stdout_data, stderr_data, exit_code = run_system(command)\n",
|
741
|
+
" \n",
|
742
|
+
" parse_output(stdout_data)\n",
|
743
|
+
" end\n",
|
744
|
+
" \n",
|
745
|
+
" private\n",
|
746
|
+
"\n",
|
747
|
+
" def parse_output(data)\n",
|
748
|
+
" delimiter = \"========\"\n",
|
749
|
+
" parsed_text = data.split(delimiter).last.strip\n",
|
750
|
+
" \n",
|
751
|
+
" return nil if parsed_text.nil?\n",
|
752
|
+
" JSON.parse(parsed_text)\n",
|
753
|
+
" end\n",
|
754
|
+
" \n",
|
755
|
+
" def run_system(command)\n",
|
756
|
+
" stdin, stdout, stderr, wait_thr = Open3.popen3(command)\n",
|
757
|
+
" stdout_data = stdout.gets(nil)\n",
|
758
|
+
" stdout.close\n",
|
759
|
+
" stderr_data = stderr.gets(nil)\n",
|
760
|
+
" stderr.close\n",
|
761
|
+
" exit_code = wait_thr.value\n",
|
762
|
+
" \n",
|
763
|
+
" [stdout_data, stderr_data, exit_code]\n",
|
764
|
+
" end\n",
|
765
|
+
"end\n",
|
766
|
+
"\n",
|
767
|
+
"class ChromaVectorStore < VectorStore\n",
|
768
|
+
" def similarity_search(query, k: 4, filter: {})\n",
|
769
|
+
" query_embeddings = text_to_embeddings(query)\n",
|
770
|
+
" \n",
|
771
|
+
" @store.query(query_embeddings:, results: k, where: filter)\n",
|
772
|
+
" end\n",
|
773
|
+
"end"
|
774
|
+
]
|
775
|
+
},
|
776
|
+
{
|
777
|
+
"cell_type": "markdown",
|
778
|
+
"id": "39419dac",
|
779
|
+
"metadata": {},
|
780
|
+
"source": [
|
781
|
+
"### Search for similiarity"
|
782
|
+
]
|
783
|
+
},
|
784
|
+
{
|
785
|
+
"cell_type": "code",
|
786
|
+
"execution_count": 20,
|
787
|
+
"id": "e1a46ead",
|
788
|
+
"metadata": {},
|
789
|
+
"outputs": [
|
790
|
+
{
|
791
|
+
"data": {
|
792
|
+
"text/plain": [
|
793
|
+
"[#<Chroma::Resources::Embedding:0x00000001046ac048 @id=\"92118736-5484-4da4-b669-cde1cc03c369\", @embedding=nil, @metadata={\"source\"=>\"ruby.txt\"}, @document=\"Array#any?\\n\\nany? → true or false\\nany? {|element| ... } → true or false\\nany?(obj) → true or false\\n\\nReturns true if any element of self meets a given criterion.\\n\\nWith no block given and no argument, returns true if self has any truthy element, false otherwise:\\n\\n[nil, 0, false].any? # => true\\n[nil, false].any? # => false\\n[].any? # => false\\nWith a block given and no argument, calls the block with each element in self; returns true if the block returns any truthy value, false otherwise:\\n\\n[0, 1, 2].any? {|element| element > 1 } # => true\\n[0, 1, 2].any? {|element| element > 2 } # => false\\nIf argument obj is given, returns true if obj.=== any element, false otherwise:\\n\\n['food', 'drink'].any?(/foo/) # => true\\n['food', 'drink'].any?(/bar/) # => false\\n[].any?(/foo/) # => false\\n[0, 1, 2].any?(1) # => true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns\", @distance=0.48262059688568115>, #<Chroma::Resources::Embedding:0x00000001046abff8 @id=\"8a63e0b7-9211-443a-b7f7-9ae387348749\", @embedding=nil, @metadata={\"source\"=>\"ruby.txt\"}, @document=\"true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns a new Array whose elements are the return values from the block:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map {|element| element.class }\\na1 # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map>\\nArray#collect is an alias for Array#map.\\n\\nAlias for: collect\\nmap! {|element| ... } → self\\nmap! → new_enumerator\\nCalls the block, if given, with each element; replaces the element with the block’s return value:\\n\\na = [:foo, 'bar', 2]\\na.map! { |element| element.class } # => [Symbol, String, Integer]\\nReturns a new Enumerator if no block given:\\n\\na = [:foo, 'bar', 2]\\na1 = a.map!\\na1 # => #<Enumerator: [:foo, \\\"bar\\\", 2]:map!>\\nArray#collect! is an alias for Array#map!.\\n\\nAlias for: collect!\\n\", @distance=0.5023157596588135>]"
|
794
|
+
]
|
795
|
+
},
|
796
|
+
"execution_count": 20,
|
797
|
+
"metadata": {},
|
798
|
+
"output_type": "execute_result"
|
799
|
+
}
|
800
|
+
],
|
801
|
+
"source": [
|
802
|
+
"vs = ChromaVectorStore.new(collection)\n",
|
803
|
+
"embeddings = vs.similarity_search(\"array any?\", k: 2)"
|
804
|
+
]
|
805
|
+
},
|
806
|
+
{
|
807
|
+
"cell_type": "code",
|
808
|
+
"execution_count": 21,
|
809
|
+
"id": "ab282709",
|
810
|
+
"metadata": {},
|
811
|
+
"outputs": [
|
812
|
+
{
|
813
|
+
"data": {
|
814
|
+
"text/plain": [
|
815
|
+
"#<Chroma::Resources::Embedding:0x00000001046ac048 @id=\"92118736-5484-4da4-b669-cde1cc03c369\", @embedding=nil, @metadata={\"source\"=>\"ruby.txt\"}, @document=\"Array#any?\\n\\nany? → true or false\\nany? {|element| ... } → true or false\\nany?(obj) → true or false\\n\\nReturns true if any element of self meets a given criterion.\\n\\nWith no block given and no argument, returns true if self has any truthy element, false otherwise:\\n\\n[nil, 0, false].any? # => true\\n[nil, false].any? # => false\\n[].any? # => false\\nWith a block given and no argument, calls the block with each element in self; returns true if the block returns any truthy value, false otherwise:\\n\\n[0, 1, 2].any? {|element| element > 1 } # => true\\n[0, 1, 2].any? {|element| element > 2 } # => false\\nIf argument obj is given, returns true if obj.=== any element, false otherwise:\\n\\n['food', 'drink'].any?(/foo/) # => true\\n['food', 'drink'].any?(/bar/) # => false\\n[].any?(/foo/) # => false\\n[0, 1, 2].any?(1) # => true\\n[0, 1, 2].any?(3) # => false\\nRelated: Enumerable#any?\\n\\n\\nArray#map Array#map!\\n\\nmap {|element| ... } → new_array\\nmap → new_enumerator\\nCalls the block, if given, with each element of self; returns\", @distance=0.48262059688568115>"
|
816
|
+
]
|
817
|
+
},
|
818
|
+
"execution_count": 21,
|
819
|
+
"metadata": {},
|
820
|
+
"output_type": "execute_result"
|
821
|
+
}
|
822
|
+
],
|
823
|
+
"source": [
|
824
|
+
"embeddings[0]"
|
825
|
+
]
|
826
|
+
},
|
827
|
+
{
|
828
|
+
"cell_type": "code",
|
829
|
+
"execution_count": null,
|
830
|
+
"id": "4ef9404d",
|
831
|
+
"metadata": {},
|
832
|
+
"outputs": [],
|
833
|
+
"source": []
|
834
|
+
}
|
835
|
+
],
|
836
|
+
"metadata": {
|
837
|
+
"kernelspec": {
|
838
|
+
"display_name": "Ruby 3.2.2",
|
839
|
+
"language": "ruby",
|
840
|
+
"name": "ruby"
|
841
|
+
},
|
842
|
+
"language_info": {
|
843
|
+
"file_extension": ".rb",
|
844
|
+
"mimetype": "application/x-ruby",
|
845
|
+
"name": "ruby",
|
846
|
+
"version": "3.2.2"
|
847
|
+
}
|
848
|
+
},
|
849
|
+
"nbformat": 4,
|
850
|
+
"nbformat_minor": 5
|
851
|
+
}
|