stretchy-model 0.6.0 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (191) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/README.md +28 -10
  4. data/Rakefile +56 -0
  5. data/containers/Dockerfile.opensearch +4 -3
  6. data/docker-compose.yml +32 -19
  7. data/docs/.nojekyll +0 -0
  8. data/docs/README.md +147 -0
  9. data/docs/_coverpage.md +14 -0
  10. data/docs/_sidebar.md +14 -0
  11. data/docs/examples/_sidebar.md +15 -0
  12. data/docs/examples/data_analysis.md +216 -0
  13. data/docs/examples/semantic_search_with_llm.md +83 -0
  14. data/docs/examples/simple-ingest-pipeline.md +326 -0
  15. data/docs/guides/_sidebar.md +14 -0
  16. data/docs/guides/aggregations.md +142 -0
  17. data/docs/guides/machine-learning.md +154 -0
  18. data/docs/guides/models.md +372 -0
  19. data/docs/guides/pipelines.md +151 -0
  20. data/docs/guides/querying.md +361 -0
  21. data/docs/guides/quick-start.md +72 -0
  22. data/docs/guides/scopes.md +125 -0
  23. data/docs/index.html +113 -0
  24. data/docs/stretchy.cover.png +0 -0
  25. data/docs/stretchy.logo.png +0 -0
  26. data/docs/styles.css +90 -0
  27. data/lib/elasticsearch/api/actions/machine_learning/models/delete_model.rb +33 -0
  28. data/lib/elasticsearch/api/actions/machine_learning/models/deploy.rb +31 -0
  29. data/lib/elasticsearch/api/actions/machine_learning/models/get_model.rb +43 -0
  30. data/lib/elasticsearch/api/actions/machine_learning/models/get_status.rb +31 -0
  31. data/lib/elasticsearch/api/actions/machine_learning/models/params_registry.rb +45 -0
  32. data/lib/elasticsearch/api/actions/machine_learning/models/register.rb +45 -0
  33. data/lib/elasticsearch/api/actions/machine_learning/models/undeploy.rb +32 -0
  34. data/lib/elasticsearch/api/actions/machine_learning/models/update_model.rb +39 -0
  35. data/lib/elasticsearch/api/namespace/machine_learning/model.rb +27 -0
  36. data/lib/opensearch/api/actions/machine_learning/models/delete_model.rb +33 -0
  37. data/lib/opensearch/api/actions/machine_learning/models/deploy.rb +31 -0
  38. data/lib/opensearch/api/actions/machine_learning/models/get_model.rb +44 -0
  39. data/lib/opensearch/api/actions/machine_learning/models/get_status.rb +31 -0
  40. data/lib/opensearch/api/actions/machine_learning/models/params_registry.rb +45 -0
  41. data/lib/opensearch/api/actions/machine_learning/models/register.rb +45 -0
  42. data/lib/opensearch/api/actions/machine_learning/models/undeploy.rb +31 -0
  43. data/lib/opensearch/api/actions/machine_learning/models/update_model.rb +39 -0
  44. data/lib/opensearch/api/namespace/machine_learning/model.rb +27 -0
  45. data/lib/stretchy/attributes/transformers/keyword_transformer.rb +41 -35
  46. data/lib/stretchy/attributes/type/array.rb +24 -1
  47. data/lib/stretchy/attributes/type/base.rb +6 -2
  48. data/lib/stretchy/attributes/type/binary.rb +24 -17
  49. data/lib/stretchy/attributes/type/boolean.rb +29 -22
  50. data/lib/stretchy/attributes/type/completion.rb +18 -10
  51. data/lib/stretchy/attributes/type/constant_keyword.rb +35 -26
  52. data/lib/stretchy/attributes/type/date_time.rb +81 -20
  53. data/lib/stretchy/attributes/type/dense_vector.rb +46 -49
  54. data/lib/stretchy/attributes/type/flattened.rb +28 -19
  55. data/lib/stretchy/attributes/type/geo_point.rb +21 -12
  56. data/lib/stretchy/attributes/type/geo_shape.rb +21 -12
  57. data/lib/stretchy/attributes/type/hash.rb +24 -10
  58. data/lib/stretchy/attributes/type/histogram.rb +25 -0
  59. data/lib/stretchy/attributes/type/ip.rb +26 -17
  60. data/lib/stretchy/attributes/type/join.rb +16 -7
  61. data/lib/stretchy/attributes/type/keyword.rb +21 -26
  62. data/lib/stretchy/attributes/type/knn_vector.rb +47 -0
  63. data/lib/stretchy/attributes/type/match_only_text.rb +22 -1
  64. data/lib/stretchy/attributes/type/nested.rb +16 -11
  65. data/lib/stretchy/attributes/type/numeric/base.rb +30 -22
  66. data/lib/stretchy/attributes/type/numeric/byte.rb +20 -0
  67. data/lib/stretchy/attributes/type/numeric/double.rb +20 -0
  68. data/lib/stretchy/attributes/type/numeric/float.rb +20 -0
  69. data/lib/stretchy/attributes/type/numeric/half_float.rb +20 -0
  70. data/lib/stretchy/attributes/type/numeric/integer.rb +21 -1
  71. data/lib/stretchy/attributes/type/numeric/long.rb +20 -0
  72. data/lib/stretchy/attributes/type/numeric/scaled_float.rb +16 -7
  73. data/lib/stretchy/attributes/type/numeric/short.rb +20 -0
  74. data/lib/stretchy/attributes/type/numeric/unsigned_long.rb +21 -1
  75. data/lib/stretchy/attributes/type/percolator.rb +16 -4
  76. data/lib/stretchy/attributes/type/point.rb +19 -9
  77. data/lib/stretchy/attributes/type/range/base.rb +24 -1
  78. data/lib/stretchy/attributes/type/range/date_range.rb +21 -5
  79. data/lib/stretchy/attributes/type/range/double_range.rb +20 -4
  80. data/lib/stretchy/attributes/type/range/float_range.rb +21 -5
  81. data/lib/stretchy/attributes/type/range/integer_range.rb +20 -4
  82. data/lib/stretchy/attributes/type/range/ip_range.rb +20 -4
  83. data/lib/stretchy/attributes/type/range/long_range.rb +20 -4
  84. data/lib/stretchy/attributes/type/rank_feature.rb +16 -6
  85. data/lib/stretchy/attributes/type/rank_features.rb +27 -10
  86. data/lib/stretchy/attributes/type/search_as_you_type.rb +28 -18
  87. data/lib/stretchy/attributes/type/shape.rb +19 -9
  88. data/lib/stretchy/attributes/type/sparse_vector.rb +25 -21
  89. data/lib/stretchy/attributes/type/string.rb +42 -1
  90. data/lib/stretchy/attributes/type/text.rb +53 -28
  91. data/lib/stretchy/attributes/type/token_count.rb +21 -11
  92. data/lib/stretchy/attributes/type/version.rb +16 -6
  93. data/lib/stretchy/attributes/type/wildcard.rb +36 -25
  94. data/lib/stretchy/attributes.rb +30 -0
  95. data/lib/stretchy/delegation/gateway_delegation.rb +86 -2
  96. data/lib/stretchy/index_setting.rb +94 -0
  97. data/lib/stretchy/indexing/bulk.rb +75 -3
  98. data/lib/stretchy/machine_learning/model.rb +192 -0
  99. data/lib/stretchy/model/callbacks.rb +1 -0
  100. data/lib/stretchy/model/common.rb +157 -0
  101. data/lib/stretchy/model/persistence.rb +144 -0
  102. data/lib/stretchy/model/refreshable.rb +26 -0
  103. data/lib/stretchy/open_search_compatibility.rb +4 -0
  104. data/lib/stretchy/pipeline.rb +124 -0
  105. data/lib/stretchy/pipelines/processor.rb +57 -0
  106. data/lib/stretchy/querying.rb +7 -7
  107. data/lib/stretchy/rails/instrumentation/publishers.rb +31 -0
  108. data/lib/{rails → stretchy/rails}/instrumentation/railtie.rb +11 -6
  109. data/lib/stretchy/record.rb +5 -4
  110. data/lib/stretchy/relation.rb +230 -28
  111. data/lib/stretchy/relations/aggregation_methods/aggregation.rb +59 -0
  112. data/lib/stretchy/relations/aggregation_methods/avg.rb +45 -0
  113. data/lib/stretchy/relations/aggregation_methods/bucket_script.rb +47 -0
  114. data/lib/stretchy/relations/aggregation_methods/bucket_selector.rb +47 -0
  115. data/lib/stretchy/relations/aggregation_methods/bucket_sort.rb +47 -0
  116. data/lib/stretchy/relations/aggregation_methods/cardinality.rb +47 -0
  117. data/lib/stretchy/relations/aggregation_methods/children.rb +47 -0
  118. data/lib/stretchy/relations/aggregation_methods/composite.rb +41 -0
  119. data/lib/stretchy/relations/aggregation_methods/date_histogram.rb +53 -0
  120. data/lib/stretchy/relations/aggregation_methods/date_range.rb +53 -0
  121. data/lib/stretchy/relations/aggregation_methods/extended_stats.rb +48 -0
  122. data/lib/stretchy/relations/aggregation_methods/filter.rb +47 -0
  123. data/lib/stretchy/relations/aggregation_methods/filters.rb +47 -0
  124. data/lib/stretchy/relations/aggregation_methods/geo_bounds.rb +40 -0
  125. data/lib/stretchy/relations/aggregation_methods/geo_centroid.rb +40 -0
  126. data/lib/stretchy/relations/aggregation_methods/global.rb +39 -0
  127. data/lib/stretchy/relations/aggregation_methods/histogram.rb +43 -0
  128. data/lib/stretchy/relations/aggregation_methods/ip_range.rb +41 -0
  129. data/lib/stretchy/relations/aggregation_methods/max.rb +40 -0
  130. data/lib/stretchy/relations/aggregation_methods/min.rb +41 -0
  131. data/lib/stretchy/relations/aggregation_methods/missing.rb +40 -0
  132. data/lib/stretchy/relations/aggregation_methods/nested.rb +40 -0
  133. data/lib/stretchy/relations/aggregation_methods/percentile_ranks.rb +45 -0
  134. data/lib/stretchy/relations/aggregation_methods/percentiles.rb +45 -0
  135. data/lib/stretchy/relations/aggregation_methods/range.rb +42 -0
  136. data/lib/stretchy/relations/aggregation_methods/reverse_nested.rb +40 -0
  137. data/lib/stretchy/relations/aggregation_methods/sampler.rb +40 -0
  138. data/lib/stretchy/relations/aggregation_methods/scripted_metric.rb +43 -0
  139. data/lib/stretchy/relations/aggregation_methods/significant_terms.rb +45 -0
  140. data/lib/stretchy/relations/aggregation_methods/stats.rb +42 -0
  141. data/lib/stretchy/relations/aggregation_methods/sum.rb +42 -0
  142. data/lib/stretchy/relations/aggregation_methods/terms.rb +46 -0
  143. data/lib/stretchy/relations/aggregation_methods/top_hits.rb +42 -0
  144. data/lib/stretchy/relations/aggregation_methods/top_metrics.rb +44 -0
  145. data/lib/stretchy/relations/aggregation_methods/value_count.rb +41 -0
  146. data/lib/stretchy/relations/aggregation_methods/weighted_avg.rb +42 -0
  147. data/lib/stretchy/relations/aggregation_methods.rb +20 -749
  148. data/lib/stretchy/relations/finder_methods.rb +2 -18
  149. data/lib/stretchy/relations/null_relation.rb +55 -0
  150. data/lib/stretchy/relations/query_builder.rb +139 -23
  151. data/lib/stretchy/relations/query_methods/bind.rb +19 -0
  152. data/lib/stretchy/relations/query_methods/extending.rb +29 -0
  153. data/lib/stretchy/relations/query_methods/fields.rb +70 -0
  154. data/lib/stretchy/relations/query_methods/filter_query.rb +53 -0
  155. data/lib/stretchy/relations/query_methods/has_field.rb +40 -0
  156. data/lib/stretchy/relations/query_methods/highlight.rb +75 -0
  157. data/lib/stretchy/relations/query_methods/hybrid.rb +60 -0
  158. data/lib/stretchy/relations/query_methods/ids.rb +40 -0
  159. data/lib/stretchy/relations/query_methods/match.rb +52 -0
  160. data/lib/stretchy/relations/query_methods/must_not.rb +54 -0
  161. data/lib/stretchy/relations/query_methods/neural.rb +58 -0
  162. data/lib/stretchy/relations/query_methods/neural_sparse.rb +43 -0
  163. data/lib/stretchy/relations/query_methods/none.rb +21 -0
  164. data/lib/stretchy/relations/query_methods/or_filter.rb +21 -0
  165. data/lib/stretchy/relations/query_methods/order.rb +63 -0
  166. data/lib/stretchy/relations/query_methods/query_string.rb +44 -0
  167. data/lib/stretchy/relations/query_methods/regexp.rb +61 -0
  168. data/lib/stretchy/relations/query_methods/should.rb +51 -0
  169. data/lib/stretchy/relations/query_methods/size.rb +44 -0
  170. data/lib/stretchy/relations/query_methods/skip_callbacks.rb +47 -0
  171. data/lib/stretchy/relations/query_methods/source.rb +59 -0
  172. data/lib/stretchy/relations/query_methods/where.rb +113 -0
  173. data/lib/stretchy/relations/query_methods.rb +51 -540
  174. data/lib/stretchy/relations/scoping/default.rb +136 -0
  175. data/lib/stretchy/relations/scoping/named.rb +70 -0
  176. data/lib/stretchy/relations/scoping/scope_registry.rb +36 -0
  177. data/lib/stretchy/relations/scoping.rb +30 -0
  178. data/lib/stretchy/relations/search_option_methods.rb +2 -0
  179. data/lib/stretchy/shared_scopes.rb +6 -1
  180. data/lib/stretchy/version.rb +1 -1
  181. data/lib/stretchy.rb +23 -11
  182. metadata +147 -18
  183. data/lib/rails/instrumentation/publishers.rb +0 -29
  184. data/lib/stretchy/common.rb +0 -33
  185. data/lib/stretchy/null_relation.rb +0 -53
  186. data/lib/stretchy/persistence.rb +0 -43
  187. data/lib/stretchy/refreshable.rb +0 -15
  188. data/lib/stretchy/scoping/default.rb +0 -134
  189. data/lib/stretchy/scoping/named.rb +0 -68
  190. data/lib/stretchy/scoping/scope_registry.rb +0 -34
  191. data/lib/stretchy/scoping.rb +0 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f23580fe0c8761ced02da75cbc44f16ab7893a559d30d9a5e0de23aa96ed790a
4
- data.tar.gz: 821381d3e9e92822d71aac637d98f8b04a76736a80e421ca7d2c4fd05fd108d8
3
+ metadata.gz: ff8d7515eb6f795cf92989ba14d3dd404bb716378896ca2da4054bca98e467f4
4
+ data.tar.gz: 2361d39fcacb927f7d62bfe2105054c0923cd6b20af3cd7724c2bc9d784d42e5
5
5
  SHA512:
6
- metadata.gz: 99dc36ee6cce021e869a8e1f3e2f005f546b1c50bf973434c624574c1dd601757baf4a1d39562f7ddd05bf4a1e640512111b821d8a70b8d5416bf73701b38f03
7
- data.tar.gz: 4a58307bd7ae5a5781e7462df9c95438f66eecab5d0ae43238878d228e2ca6939a8a7643b70ac3c2586dcae537935206bfb15339415f6b3c3a70ef2518e401b4
6
+ metadata.gz: 410eb69810533c4a12fe12b1bdd5ea6df9b320f9c1953a32f924487b02f1825296765e5a5451abf1c22491a6d9e8a1741e4e7ef01789acbac0026e1db423602d
7
+ data.tar.gz: 9afdb5b058a307c3b64f8043f1e4f2348973050b6b5c802726031f060a44ac11003633ba816ab278ba204c1a4946a12f0c0c7171fd72cf0188493c43b3e0d4df
data/.yardopts CHANGED
@@ -1 +1,2 @@
1
- --markup markdown
1
+ # --markup markdown
2
+ # --format=markdown
data/README.md CHANGED
@@ -1,34 +1,43 @@
1
1
  stretchy-model
2
2
  ===
3
3
  <p>
4
- <a href="https://stretchy.io/" target="_blank"><img src="./stretchy.logo.png" alt="Gum Image" width="450" /></a>
4
+ <a href="https://stretchy.io/" target="_blank"><img src="./stretchy.logo.png" alt="Stretchy Image" width="450" /></a>
5
5
  <br><br>
6
6
  <a href="https://github.com/theablefew/stretchy/releases"><img src="https://img.shields.io/github/v/release/theablefew/stretchy?sort=semver&color=blue"></a>
7
7
  <a href="https://github.com/theablefew/stretchy/actions"><img src="https://github.com/theablefew/stretchy/actions/workflows/spec.yml/badge.svg"></a>
8
8
 
9
9
  </p>
10
10
 
11
+ Stretchy provides Elasticsearch/Opensearch models in Rails applications with an Rails-like model interface.
11
12
 
12
13
  ## Features
13
14
  Stretchy simplifies the process of querying, aggregating, and managing Elasticsearch-backed models, allowing Rails developers to work with search indices as comfortably as they would with traditional Rails models.
14
15
 
15
- * Model fully back by Elasticsearch/Opensearch
16
- * Chain queries, scopes and aggregations
16
+ * Models fully back by Elasticsearch/Opensearch
17
+ * Chain `queries`, `scopes` and `aggregations`
17
18
  * Reduce Elasticsearch query complexity
18
19
  * Support for time-based indices and aliases
19
- * Associations to both ActiveRecord models and Stretchy::Record
20
+ * Associations to both ActiveRecord models and `StretchyModel`
20
21
  * Bulk Operations made easy
22
+ * Ingest and Search Pipelines
23
+ * Machine Learning
24
+ * Vector and Neural search
25
+ * Integrated RAG and LLM connectors
21
26
  * Validations, custom attributes, and more...
22
27
 
23
28
  Follow the guides to learn more about:
24
-
25
29
  * [Models](https://theablefew.github.io/stretchy/#/guides/models?id=models)
26
30
  * [Querying](https://theablefew.github.io/stretchy/#/guides/querying?id=querying)
27
31
  * [Aggregations](https://theablefew.github.io/stretchy/#/guides/aggregations?id=aggregations)
28
32
  * [Scopes](https://theablefew.github.io/stretchy/#/guides/scopes?id=scopes)
33
+ * [Pipelines](https://theablefew.github.io/stretchy/#/guides/pipelines?id=pipelines)
34
+ * [Machine Learning](https://theablefew.github.io/stretchy/#/guides/machine-learning?id=machine-learning)
29
35
 
36
+ [Read the Documentation](https://theablefew.github.io/stretchy/#/) or follow the examples below:
30
37
 
31
- [Read the Documentation](https://theablefew.github.io/stretchy/#/) or walk through of a simple [Data Analysis](https://theablefew.github.io/stretchy/#/examples/data_analysis?id=data-analysis) example.
38
+ **Examples**
39
+ - [Data Analysis](https://theablefew.github.io/stretchy/#/examples/data_analysis?id=data-analysis) example.
40
+ - [Simple Ingest Pipeline](https://theablefew.github.io/stretchy/#/examples/simple-ingest-pipeline?id=simple-ingest-pipeline)
32
41
 
33
42
 
34
43
 
@@ -45,11 +54,15 @@ If bundler is not being used to manage dependencies, install the gem by executin
45
54
  gem install stretchy-model
46
55
  ```
47
56
 
57
+ >[!TIP]
58
+ > If using OpenSearch make sure to add the gem to your Gemfile.
59
+ >
60
+ > `bundle add opensearch-ruby`
61
+
48
62
  <details>
49
63
  <summary>Rails Configuration</summary>
50
64
 
51
65
 
52
-
53
66
  ```sh
54
67
  rails credentials:edit
55
68
  ```
@@ -59,19 +72,24 @@ rails credentials:edit
59
72
  elasticsearch:
60
73
  url: localhost:9200
61
74
 
62
- # or opensearch
75
+ # or if using opensearch
63
76
  # opensearch:
64
77
  # host: https://localhost:9200
65
78
  # user: admin
66
79
  # password: admin
80
+ # transport_options:
81
+ # ssl:
82
+ # verify: false
67
83
  ```
68
84
 
69
85
  #### Create an initializer
70
86
  <p><sub><em>config/initializers/stretchy.rb</em></sub></p>
71
87
 
72
- ```ruby {file=config/initializers/stretchy.rb}
88
+ ```ruby
73
89
  Stretchy.configure do |config|
74
- config.client = Elasticsearch::Client.new url: Rails.application.credentials.elasticsearch.url, log: true
90
+ config.client = Elasticsearch::Client.new Rails.application.credentials.elasticsearch
91
+ # or if using OpenSearch
92
+ # config.client = OpenSearch::Client.new Rails.application.credentials.opensearch
75
93
  end
76
94
  ```
77
95
  </details>
data/Rakefile CHANGED
@@ -94,3 +94,59 @@ namespace :publish do
94
94
  Rake::Task['publish:release'].invoke('patch')
95
95
  end
96
96
  end
97
+
98
+
99
+ namespace :documentation do
100
+
101
+ desc "Generate documentation"
102
+ task :generate do
103
+ system('rdoc --format=markdown --markup=markdown -o docs/doc --force-output -O')
104
+ # generate sidebar
105
+ Rake::Task['documentation:build_sidebar'].invoke
106
+
107
+ end
108
+
109
+ desc "Build sidebar"
110
+ task :build_sidebar do
111
+ # Get all directories in docs/doc
112
+ directories = Dir.glob('docs/doc/**/*').select { |f| File.directory?(f) }
113
+
114
+ # For each directory
115
+ directories.each do |dir|
116
+ puts "Building sidebar for #{dir}"
117
+ # Open _sidebar.md in write mode
118
+ File.open("#{dir}/_sidebar.md", 'w') do |file|
119
+ # Get all .md files in the directory
120
+ md_files = Dir.glob("#{dir}/*.md")
121
+
122
+ # Add top level header with link to the directory
123
+ file.puts("- [#{File.basename(dir)}](#{dir.gsub('docs/', '')}.md)")
124
+ # For each .md file
125
+ md_files.each do |md_file|
126
+ next if md_file.include?('_sidebar.md')
127
+ # Write a markdown link to the file
128
+ md_file_path = md_file.gsub('docs/', '')
129
+ file_name = File.basename(md_file, '.md')
130
+ file.puts(" - [#{file_name}](#{md_file_path})")
131
+
132
+
133
+ # Check for a directory with the same name as the file
134
+ if Dir.exist?("#{dir}/#{file_name}")
135
+ # Get all .md files in the subdirectory
136
+ sub_md_files = Dir.glob("#{dir}/#{file_name}/*.md")
137
+ # For each .md file in the subdirectory
138
+ # Put the parent directory name as a header
139
+ sub_md_files.each do |sub_md_file|
140
+ # Write a markdown link to the file
141
+ sub_md_file_path = sub_md_file.gsub('docs/', '')
142
+ sub_file_name = File.basename(sub_md_file, '.md')
143
+ next if sub_md_file.include?('_sidebar.md')
144
+ file.puts(" - [#{sub_file_name}](#{sub_md_file_path})")
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  # Base image
4
4
  FROM opensearchproject/opensearch:2.12.0
5
-
6
5
  # Environment variables
7
6
  # disables bootstrap checks that are enabled when network.host is set to a non-loopback address
8
7
  ENV discovery.type=single-node
@@ -12,8 +11,10 @@ ENV plugins.security.disabled=true
12
11
  # along with the memlock settings below, disables swapping
13
12
  ENV bootstrap.memory_lock=true
14
13
  # minimum and maximum Java heap size, recommend setting both to 50% of system RAM
15
- ENV OPENSEARCH_JAVA_OPTS="-Xms512m -Xmx512m"
14
+ ENV OPENSEARCH_JAVA_OPTS="-Xms3078m -Xmx3078m"
16
15
  # disables execution of install_demo_configuration.sh bundled with security plugin, which installs demo certificates and security configurations to OpenSearch
17
16
  ENV DISABLE_INSTALL_DEMO_CONFIG=true
18
17
  # disables security plugin entirely in OpenSearch by setting plugins.security.disabled: true in opensearch.yml
19
- ENV DISABLE_SECURITY_PLUGIN=true
18
+ # ENV DISABLE_SECURITY_PLUGIN=true
19
+
20
+
data/docker-compose.yml CHANGED
@@ -10,11 +10,16 @@ services:
10
10
  - "9200:9200"
11
11
  environment:
12
12
  - discovery.type=single-node
13
+ networks:
14
+ - elasticsearch-net
15
+
13
16
 
14
17
  opensearch:
15
- build:
16
- context: .
17
- dockerfile: containers/Dockerfile.opensearch
18
+ image: opensearchproject/opensearch:latest
19
+ container_name: opensearch-node1
20
+ # build:
21
+ # context: .
22
+ # dockerfile: containers/Dockerfile.opensearch
18
23
  ulimits:
19
24
  memlock:
20
25
  soft: -1
@@ -24,29 +29,37 @@ services:
24
29
  hard: 65536
25
30
  environment:
26
31
  - discovery.type=single-node
32
+ - node.name=opensearch-node1
33
+ - plugins.security.disabled=true
34
+ - bootstrap.memory_lock=true
35
+ - "OPENSEARCH_JAVA_OPTS=-Xms3078m -Xmx3078m"
36
+ - OPENSEARCH_INITIAL_ADMIN_PASSWORD=A3s0p3nS3cUr1tY
27
37
  # volumes:
28
38
  # - opensearch-data1:/usr/share/opensearch/data
29
39
  ports:
30
40
  - 9200:9200
31
41
  - 9600:9600 # required for Performance Analyzer
32
- # networks:
33
- # - opensearch-net
42
+ networks:
43
+ - opensearch-net
44
+
45
+ opensearch-dashboards:
46
+ image: opensearch-dashboards-no-security # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
47
+ container_name: opensearch-dashboards
48
+ ports:
49
+ - 5601:5601 # Map host port 5601 to container port 5601
50
+ expose:
51
+ - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
52
+ environment:
53
+ - OPENSEARCH_HOSTS="http://opensearch-node1:9200"
54
+ - OPENSEARCH_INITIAL_ADMIN_PASSWORD=A3s0p3nS3cUr1tY
55
+ - plugins.security.disabled=true
56
+ networks:
57
+ - opensearch-net
34
58
 
35
- # opensearch-dashboards:
36
- # image: opensearchproject/opensearch-dashboards:1.2.0
37
- # container_name: opensearch-dashboards
38
- # ports:
39
- # - 5601:5601
40
- # expose:
41
- # - "5601"
42
- # environment:
43
- # - 'OPENSEARCH_HOSTS=["http://opensearch-node1:9200"]'
44
- # - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards
45
- # networks:
46
- # - opensearch-net
47
59
 
48
60
  # volumes:
49
61
  # opensearch-data1:
50
62
 
51
- # networks:
52
- # opensearch-net:
63
+ networks:
64
+ opensearch-net:
65
+ elasticsearch-net:
data/docs/.nojekyll ADDED
File without changes
data/docs/README.md ADDED
@@ -0,0 +1,147 @@
1
+ stretchy-model
2
+ ===
3
+ Stretchy provides Elasticsearch/Opensearch models in Rails applications with an Rails-like model interface.
4
+ <p >
5
+ <a href="https://github.com/theablefew/stretchy/releases"><img src="https://img.shields.io/github/v/release/theablefew/stretchy?sort=semver&color=blue"></a>
6
+ <a href="https://github.com/theablefew/stretchy/actions"><img src="https://github.com/theablefew/stretchy/actions/workflows/spec.yml/badge.svg"></a>
7
+
8
+ </p>
9
+
10
+ ## Features
11
+ Stretchy simplifies the process of querying, aggregating, and managing Elasticsearch-backed models, allowing Rails developers to work with search indices as comfortably as they would with traditional Rails models.
12
+
13
+ * Models fully back by Elasticsearch/Opensearch
14
+ * Chain `queries`, `scopes` and `aggregations`
15
+ * Reduce Elasticsearch query complexity
16
+ * Support for time-based indices and aliases
17
+ * Associations to both ActiveRecord models and `StretchyModel`
18
+ * Bulk Operations made easy
19
+ * Ingest and Search Pipelines
20
+ * Machine Learning
21
+ * Vector and Neural search
22
+ * Integrated RAG and LLM connectors
23
+ * Validations, custom attributes, and more...
24
+
25
+ Follow the guides to learn more about:
26
+ * [Models](guides/models?id=models)
27
+ * [Querying](guides/querying?id=querying)
28
+ * [Aggregations](guides/aggregations?id=aggregations)
29
+ * [Scopes](guides/scopes?id=scopes)
30
+ * [Pipelines](guides/pipelines?id=pipelines)
31
+ * [Machine Learning](guides/machine-learning?id=machine-learning)
32
+
33
+ **Examples**
34
+ - [Data Analysis](examples/data_analysis?id=data-analysis)
35
+ - [Simple Ingest Pipeline](examples/simple-ingest-pipeline?id=simple-ingest-pipeline)
36
+
37
+
38
+ ## Installation
39
+
40
+ Install the gem and add to the application's Gemfile by executing:
41
+
42
+ ```sh
43
+ bundle add stretchy-model
44
+ ```
45
+
46
+ If bundler is not being used to manage dependencies, install the gem by executing:
47
+
48
+ ```sh
49
+ gem install stretchy-model
50
+ ```
51
+
52
+ >[!INFO|style:flat]
53
+ > If using OpenSearch add the gem to your Gemfile:
54
+ >
55
+ > ```
56
+ > bundle add opensearch-ruby
57
+ > ```
58
+
59
+ #### Add credentials
60
+
61
+ ```sh
62
+ rails credentials:edit
63
+ ```
64
+
65
+ ```yaml
66
+ elasticsearch:
67
+ url: localhost:9200
68
+
69
+ # or if using opensearch
70
+ # opensearch:
71
+ # host: http://localhost:9200
72
+ # user: admin
73
+ # password: admin
74
+ # transport_options:
75
+ # ssl:
76
+ # verify: false
77
+ ```
78
+
79
+ #### Create an initializer
80
+ *config/initializers/stretchy.rb*
81
+
82
+ ```ruby
83
+ Stretchy.configure do |config|
84
+ config.client = Elasticsearch::Client.new Rails.application.credentials.elasticsearch
85
+ # or if using OpenSearch
86
+ # config.client = OpenSearch::Client.new Rails.application.credentials.opensearch
87
+ end
88
+ ```
89
+
90
+
91
+
92
+ ## Development
93
+
94
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
95
+
96
+
97
+ ## Testing
98
+ <details>
99
+ <summary>Act</summary>
100
+
101
+ Run github action workflow locally
102
+
103
+ ```sh
104
+ brew install act --HEAD
105
+ ```
106
+
107
+ ```sh
108
+ act -P ubuntu-latest=ghcr.io/catthehacker/ubuntu:runner-latest
109
+ ```
110
+
111
+ </details>
112
+
113
+ <details>
114
+ <summary>Elasticsearch</summary>
115
+
116
+
117
+ ```
118
+ docker-compose up elasticsearch
119
+ ```
120
+
121
+ ```
122
+ bundle exec rspec
123
+ ```
124
+
125
+ </details>
126
+
127
+ <details>
128
+ <summary>Opensearch</summary>
129
+
130
+
131
+ ```
132
+ docker-compose up opensearch
133
+ ```
134
+
135
+ ```
136
+ ENV['BACKEND']=opensearch bundle rspec
137
+ ```
138
+ </details>
139
+
140
+ ## Contributing
141
+
142
+ Bug reports and pull requests are welcome on GitHub at https://github.com/theablefew/stretchy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/theablefew/stretchy/blob/master/CODE_OF_CONDUCT.md).
143
+
144
+ ## License
145
+
146
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
147
+
@@ -0,0 +1,14 @@
1
+
2
+ <img src="./stretchy.cover.png" alt="stretchy" width="650" />
3
+
4
+
5
+ > Elasticsearch Models for Ruby on Rails
6
+
7
+ - Chainable Queries and Aggregations
8
+ - Integrated RAG and LLM Agents
9
+ - Ingest Pipelines
10
+
11
+ [GitHub](https://github.com/theablefew/stretchy)
12
+ [Get Started](#stretchy-model)
13
+
14
+ ![color](#D0B6E1)
data/docs/_sidebar.md ADDED
@@ -0,0 +1,14 @@
1
+ * [__Readme__](/)
2
+
3
+ * __Guides__
4
+ * [Quick Start](guides/quick-start?id=quick-start)
5
+ * [Models](guides/models?id=models)
6
+ * [Querying](guides/querying?id=querying)
7
+ * [Scopes](guides/scopes?id=scopes)
8
+ * [Aggregations](guides/aggregations?id=aggregations)
9
+ * [Pipelines](guides/pipelines?id=pipelines)
10
+ * [Machine Learning](guides/machine-learning?id=machine-learning)
11
+
12
+ * __Examples__
13
+ * [Data Analysis](examples/data_analysis)
14
+ * [Simple Ingest Pipeline](examples/simple-ingest-pipeline)
@@ -0,0 +1,15 @@
1
+ * [__Readme__](/)
2
+
3
+ * __Guides__
4
+ * [Quick Start](guides/quick-start?id=quick-start)
5
+ * [Models](guides/models?id=models)
6
+ * [Querying](guides/querying?id=querying)
7
+ * [Scopes](guides/scopes?id=scopes)
8
+ * [Aggregations](guides/aggregations?id=aggregations)
9
+ * [Pipelines](guides/pipelines?id=pipelines)
10
+ * [Machine Learning](guides/machine-learning?id=machine-learning)
11
+
12
+ * __Examples__
13
+ * [Data Analysis](examples/data_analysis)
14
+ * [Simple Ingest Pipeline](examples/simple-ingest-pipeline?id=simple-ingest-pipeline)
15
+ * [Semantic Search with LLMs](examples/semantic_search_with_llm)
@@ -0,0 +1,216 @@
1
+ # Data Analysis
2
+
3
+ This guide will walk you through setting up a basic Rails application using Stretchy and Elasticsearch to ingest and visualize time series data.
4
+
5
+ ## Prerequisites
6
+
7
+ - Ruby and Rails installed on your machine
8
+ - Elasticsearch installed and running
9
+
10
+
11
+
12
+ __Start Elasticsearch:__
13
+
14
+ ```sh
15
+ docker run -d -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.15.0
16
+ ```
17
+
18
+ ## Step 1: Create a New Rails Application
19
+
20
+ ```sh
21
+ rails new time_series_app
22
+ cd time_series_app
23
+ ```
24
+
25
+ ## Step 2: Add `stretchy-model` to Your Gemfile
26
+ ```sh
27
+ bundle add stretchy-model
28
+ ```
29
+ ## Step 3: Configure Stretchy
30
+ #### Add credentials
31
+ ```sh
32
+ rails credentials:edit
33
+ ```
34
+
35
+ ```yaml
36
+ elasticsearch:
37
+ url: localhost:9200
38
+
39
+ # or opensearch
40
+ # opensearch:
41
+ # host: https://localhost:9200
42
+ # user: admin
43
+ # password: admin
44
+ # transport_options:
45
+ # ssl:
46
+ # verify: false
47
+ ```
48
+
49
+ > __NOTE__ If using Opensearch be sure to run:
50
+ > ```sh
51
+ > bundle add opensearch
52
+ > ```
53
+
54
+ Create an initializer file config/initializers/stretchy.rb and add the following:
55
+ ```ruby
56
+ Stretchy.configure do |config|
57
+ config.client = Elasticsearch::Client.new Rails.application.credentials.elasticsearch
58
+ # or if using Opensearch
59
+ # config.client = Openseaerch::Client.new Rails.application.credentials.opensearch
60
+ end
61
+ ```
62
+ ## Identify a Data Set
63
+
64
+ Let's use "Historical Plane Crashes Since 1908" for our dataset. The following will download the csv, transform the headers and output a json file.
65
+
66
+ Open a Rails console:
67
+ ```sh
68
+ rails c
69
+ ```
70
+
71
+ ```ruby
72
+ require 'open-uri'
73
+ require 'csv'
74
+
75
+ url = 'https://huggingface.co/datasets/nateraw/airplane-crashes-and-fatalities/resolve/main/Airplane_Crashes_and_Fatalities_Since_1908.csv?download=true'
76
+ download = URI.open(url)
77
+ scrub_headers = lambda { |header| header.parameterize.underscore }
78
+ csv = CSV.parse(download.read, headers: true, header_converters: scrub_headers)
79
+ data = csv.map(&:to_hash)
80
+ data.each { |d| d.merge!({'occurred_on': "#{d.delete('date')} #{d.delete('time')}"}); d.delete('index') }
81
+
82
+ File.open('airplane_crashes.json', 'w') do |f|
83
+ f.write(JSON.pretty_generate(data))
84
+ end
85
+ ```
86
+
87
+ Now, each entry has the following schema:
88
+ ```json
89
+ {
90
+ "location": "Fort Myer, Virginia",
91
+ "operator": "Military - U.S. Army",
92
+ "flight": null,
93
+ "route": "Demonstration",
94
+ "type": "Wright Flyer III",
95
+ "registration": null,
96
+ "cn_in": "1",
97
+ "aboard": "2.0",
98
+ "fatalities": "1.0",
99
+ "ground": "0.0",
100
+ "summary": "During a demonstration flight, a U.S. Army flyer flown by Orville Wright nose-dived into the ground from a height of approximately 75 feet, killing Lt. Thomas E. Selfridge who was a passenger. This was the first recorded airplane fatality in history. One of two propellers separated in flight, tearing loose the wires bracing the rudder and causing the loss of control of the aircraft. Orville Wright suffered broken ribs, pelvis and a leg. Selfridge suffered a crushed skull and died a short time later.",
101
+ "occured_on": "09/17/1908 17:18"
102
+ }
103
+ ```
104
+
105
+ ## Understanding Performance Benefits
106
+ ## Define a Index Strategy
107
+
108
+ Since we have a date field, we can store this data across multiple indexes. `stretchy-model` makes it easy to facilitate this strategy, which is useful when you have a rolling window of data.
109
+
110
+ ## Determine Routing Strategy
111
+
112
+ Routing determines which shard in the cluster the document will be written routed to. For large data sets it's a good idea to choose a routing key that groups similar queried data together. It all depends on your needs and your data distribution. For example purposes we'll route based on the operator.
113
+
114
+ ## Define CrashEvent Model
115
+ Create a file for `CrashEvent` at `app/models/crash_event.rb`
116
+
117
+ ```ruby
118
+ class CrashEvent < StretchyModel
119
+
120
+ attribute :occurred_on, :datetime, model_format: '%m/%d/%Y'
121
+ attribute :location, :keyword
122
+ attribute :operator, :keyword
123
+ attribute :flight, :keyword
124
+ attribute :route, :keyword
125
+ attribute :type, :keyword
126
+ attribute :registration, :keyword
127
+ attribute :cn_in, :keyword
128
+ attribute :aboard, :float
129
+ attribute :fatalities, :float
130
+ attribute :ground, :float
131
+ attribute :summary, :text
132
+
133
+ scope :operator_fatalities, -> { aggregation(:operator_fatalities,
134
+ {
135
+ terms: {
136
+ field: :operator
137
+ },
138
+ aggs: {
139
+ fatalities: {
140
+ sum: {
141
+ field: :fatalities
142
+ }
143
+ }
144
+ }
145
+ }
146
+ )
147
+ }
148
+
149
+ scope :date_histogram, ->(name, options, *aggs) do
150
+ # requires field and calendar_interval or interval
151
+ aggregation(name, {date_histogram: options}.merge(*aggs))
152
+ end
153
+
154
+ end
155
+
156
+ ```
157
+
158
+
159
+ ## Bulk Index Documents
160
+
161
+
162
+ Reload your Rails console and start the bulk index operation:
163
+
164
+ ```ruby
165
+ data = JSON.parse(File.read('/Users/spencer/Downloads/airplane_crashes.json'))
166
+ CrashEvent.bulk_in_batches(data, size: 100) do |batch|
167
+ puts "Processing batch: #{batch.length}"
168
+ batch.map! do |event|
169
+ CrashEvent.new(event.symbolize_keys).to_bulk
170
+ end
171
+ end
172
+ ```
173
+
174
+ You should see a response showing that each batch was indexed successfully and we should have entries in Elasticsearch!
175
+
176
+ ```ruby
177
+ CrashEvent.count
178
+ #=> 5268
179
+ ```
180
+
181
+ Let's use our scope to apply the aggregation and set size to 0 since we're only performing aggregations and don't want any document source
182
+ ```ruby
183
+ response = CrashEvent.operator_fatalities.size(0)
184
+ ap response.aggregations.operator_fatalities.buckets
185
+ ```
186
+
187
+ ```ruby
188
+ => [
189
+ {
190
+ "key" => "Aeroflot",
191
+ "doc_count" => 179,
192
+ "fatalities" => {
193
+ "value" => 7156.0
194
+ }
195
+ },
196
+ {
197
+ "key" => "Military - U.S. Air Force",
198
+ "doc_count" => 176,
199
+ "fatalities" => {
200
+ "value" => 3717.0
201
+ }
202
+ },
203
+ {
204
+ "key" => "Air France",
205
+ "doc_count" => 70,
206
+ "fatalities" => {
207
+ "value" => 1734.0
208
+ }
209
+ },
210
+ ...
211
+ ]
212
+ ```
213
+
214
+
215
+ ## Visualization
216
+ _Coming soon.._