stretchy-model 0.6.5 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -1
  3. data/README.md +28 -10
  4. data/Rakefile +56 -0
  5. data/docs/.nojekyll +0 -0
  6. data/docs/README.md +147 -0
  7. data/docs/_coverpage.md +14 -0
  8. data/docs/_sidebar.md +14 -0
  9. data/docs/examples/_sidebar.md +15 -0
  10. data/docs/examples/data_analysis.md +216 -0
  11. data/docs/examples/semantic_search_with_llm.md +83 -0
  12. data/docs/examples/simple-ingest-pipeline.md +326 -0
  13. data/docs/guides/_sidebar.md +14 -0
  14. data/docs/guides/aggregations.md +142 -0
  15. data/docs/guides/machine-learning.md +154 -0
  16. data/docs/guides/models.md +372 -0
  17. data/docs/guides/pipelines.md +151 -0
  18. data/docs/guides/querying.md +361 -0
  19. data/docs/guides/quick-start.md +72 -0
  20. data/docs/guides/scopes.md +125 -0
  21. data/docs/index.html +113 -0
  22. data/docs/stretchy.cover.png +0 -0
  23. data/docs/stretchy.logo.png +0 -0
  24. data/docs/styles.css +90 -0
  25. data/lib/stretchy/attributes/transformers/keyword_transformer.rb +41 -35
  26. data/lib/stretchy/attributes/type/array.rb +24 -1
  27. data/lib/stretchy/attributes/type/base.rb +6 -2
  28. data/lib/stretchy/attributes/type/binary.rb +24 -17
  29. data/lib/stretchy/attributes/type/boolean.rb +29 -22
  30. data/lib/stretchy/attributes/type/completion.rb +18 -10
  31. data/lib/stretchy/attributes/type/constant_keyword.rb +35 -26
  32. data/lib/stretchy/attributes/type/date_time.rb +28 -17
  33. data/lib/stretchy/attributes/type/dense_vector.rb +46 -49
  34. data/lib/stretchy/attributes/type/flattened.rb +28 -19
  35. data/lib/stretchy/attributes/type/geo_point.rb +21 -12
  36. data/lib/stretchy/attributes/type/geo_shape.rb +21 -12
  37. data/lib/stretchy/attributes/type/hash.rb +24 -10
  38. data/lib/stretchy/attributes/type/histogram.rb +25 -0
  39. data/lib/stretchy/attributes/type/ip.rb +26 -17
  40. data/lib/stretchy/attributes/type/join.rb +16 -7
  41. data/lib/stretchy/attributes/type/keyword.rb +21 -26
  42. data/lib/stretchy/attributes/type/knn_vector.rb +47 -0
  43. data/lib/stretchy/attributes/type/match_only_text.rb +22 -1
  44. data/lib/stretchy/attributes/type/nested.rb +16 -11
  45. data/lib/stretchy/attributes/type/numeric/base.rb +30 -22
  46. data/lib/stretchy/attributes/type/numeric/byte.rb +20 -0
  47. data/lib/stretchy/attributes/type/numeric/double.rb +20 -0
  48. data/lib/stretchy/attributes/type/numeric/float.rb +20 -0
  49. data/lib/stretchy/attributes/type/numeric/half_float.rb +20 -0
  50. data/lib/stretchy/attributes/type/numeric/integer.rb +21 -1
  51. data/lib/stretchy/attributes/type/numeric/long.rb +20 -0
  52. data/lib/stretchy/attributes/type/numeric/scaled_float.rb +16 -7
  53. data/lib/stretchy/attributes/type/numeric/short.rb +20 -0
  54. data/lib/stretchy/attributes/type/numeric/unsigned_long.rb +21 -1
  55. data/lib/stretchy/attributes/type/percolator.rb +16 -4
  56. data/lib/stretchy/attributes/type/point.rb +19 -9
  57. data/lib/stretchy/attributes/type/range/base.rb +24 -1
  58. data/lib/stretchy/attributes/type/range/date_range.rb +21 -5
  59. data/lib/stretchy/attributes/type/range/double_range.rb +20 -4
  60. data/lib/stretchy/attributes/type/range/float_range.rb +21 -5
  61. data/lib/stretchy/attributes/type/range/integer_range.rb +20 -4
  62. data/lib/stretchy/attributes/type/range/ip_range.rb +20 -4
  63. data/lib/stretchy/attributes/type/range/long_range.rb +20 -4
  64. data/lib/stretchy/attributes/type/rank_feature.rb +16 -6
  65. data/lib/stretchy/attributes/type/rank_features.rb +16 -9
  66. data/lib/stretchy/attributes/type/search_as_you_type.rb +28 -18
  67. data/lib/stretchy/attributes/type/shape.rb +19 -9
  68. data/lib/stretchy/attributes/type/sparse_vector.rb +25 -21
  69. data/lib/stretchy/attributes/type/string.rb +42 -1
  70. data/lib/stretchy/attributes/type/text.rb +53 -28
  71. data/lib/stretchy/attributes/type/token_count.rb +21 -11
  72. data/lib/stretchy/attributes/type/version.rb +16 -6
  73. data/lib/stretchy/attributes/type/wildcard.rb +36 -25
  74. data/lib/stretchy/attributes.rb +29 -0
  75. data/lib/stretchy/delegation/gateway_delegation.rb +78 -0
  76. data/lib/stretchy/index_setting.rb +94 -0
  77. data/lib/stretchy/indexing/bulk.rb +75 -3
  78. data/lib/stretchy/model/callbacks.rb +1 -0
  79. data/lib/stretchy/model/common.rb +157 -0
  80. data/lib/stretchy/model/persistence.rb +144 -0
  81. data/lib/stretchy/model/refreshable.rb +26 -0
  82. data/lib/stretchy/pipeline.rb +2 -1
  83. data/lib/stretchy/pipelines/processor.rb +38 -36
  84. data/lib/stretchy/querying.rb +7 -8
  85. data/lib/stretchy/record.rb +5 -4
  86. data/lib/stretchy/relation.rb +229 -28
  87. data/lib/stretchy/relations/aggregation_methods/aggregation.rb +59 -0
  88. data/lib/stretchy/relations/aggregation_methods/avg.rb +45 -0
  89. data/lib/stretchy/relations/aggregation_methods/bucket_script.rb +47 -0
  90. data/lib/stretchy/relations/aggregation_methods/bucket_selector.rb +47 -0
  91. data/lib/stretchy/relations/aggregation_methods/bucket_sort.rb +47 -0
  92. data/lib/stretchy/relations/aggregation_methods/cardinality.rb +47 -0
  93. data/lib/stretchy/relations/aggregation_methods/children.rb +47 -0
  94. data/lib/stretchy/relations/aggregation_methods/composite.rb +41 -0
  95. data/lib/stretchy/relations/aggregation_methods/date_histogram.rb +53 -0
  96. data/lib/stretchy/relations/aggregation_methods/date_range.rb +53 -0
  97. data/lib/stretchy/relations/aggregation_methods/extended_stats.rb +48 -0
  98. data/lib/stretchy/relations/aggregation_methods/filter.rb +47 -0
  99. data/lib/stretchy/relations/aggregation_methods/filters.rb +47 -0
  100. data/lib/stretchy/relations/aggregation_methods/geo_bounds.rb +40 -0
  101. data/lib/stretchy/relations/aggregation_methods/geo_centroid.rb +40 -0
  102. data/lib/stretchy/relations/aggregation_methods/global.rb +39 -0
  103. data/lib/stretchy/relations/aggregation_methods/histogram.rb +43 -0
  104. data/lib/stretchy/relations/aggregation_methods/ip_range.rb +41 -0
  105. data/lib/stretchy/relations/aggregation_methods/max.rb +40 -0
  106. data/lib/stretchy/relations/aggregation_methods/min.rb +41 -0
  107. data/lib/stretchy/relations/aggregation_methods/missing.rb +40 -0
  108. data/lib/stretchy/relations/aggregation_methods/nested.rb +40 -0
  109. data/lib/stretchy/relations/aggregation_methods/percentile_ranks.rb +45 -0
  110. data/lib/stretchy/relations/aggregation_methods/percentiles.rb +45 -0
  111. data/lib/stretchy/relations/aggregation_methods/range.rb +42 -0
  112. data/lib/stretchy/relations/aggregation_methods/reverse_nested.rb +40 -0
  113. data/lib/stretchy/relations/aggregation_methods/sampler.rb +40 -0
  114. data/lib/stretchy/relations/aggregation_methods/scripted_metric.rb +43 -0
  115. data/lib/stretchy/relations/aggregation_methods/significant_terms.rb +45 -0
  116. data/lib/stretchy/relations/aggregation_methods/stats.rb +42 -0
  117. data/lib/stretchy/relations/aggregation_methods/sum.rb +42 -0
  118. data/lib/stretchy/relations/aggregation_methods/terms.rb +46 -0
  119. data/lib/stretchy/relations/aggregation_methods/top_hits.rb +42 -0
  120. data/lib/stretchy/relations/aggregation_methods/top_metrics.rb +44 -0
  121. data/lib/stretchy/relations/aggregation_methods/value_count.rb +41 -0
  122. data/lib/stretchy/relations/aggregation_methods/weighted_avg.rb +42 -0
  123. data/lib/stretchy/relations/aggregation_methods.rb +20 -749
  124. data/lib/stretchy/relations/finder_methods.rb +2 -18
  125. data/lib/stretchy/relations/null_relation.rb +55 -0
  126. data/lib/stretchy/relations/query_builder.rb +82 -36
  127. data/lib/stretchy/relations/query_methods/bind.rb +19 -0
  128. data/lib/stretchy/relations/query_methods/extending.rb +29 -0
  129. data/lib/stretchy/relations/query_methods/fields.rb +70 -0
  130. data/lib/stretchy/relations/query_methods/filter_query.rb +53 -0
  131. data/lib/stretchy/relations/query_methods/has_field.rb +40 -0
  132. data/lib/stretchy/relations/query_methods/highlight.rb +75 -0
  133. data/lib/stretchy/relations/query_methods/hybrid.rb +60 -0
  134. data/lib/stretchy/relations/query_methods/ids.rb +40 -0
  135. data/lib/stretchy/relations/query_methods/match.rb +52 -0
  136. data/lib/stretchy/relations/query_methods/must_not.rb +54 -0
  137. data/lib/stretchy/relations/query_methods/neural.rb +58 -0
  138. data/lib/stretchy/relations/query_methods/neural_sparse.rb +43 -0
  139. data/lib/stretchy/relations/query_methods/none.rb +21 -0
  140. data/lib/stretchy/relations/query_methods/or_filter.rb +21 -0
  141. data/lib/stretchy/relations/query_methods/order.rb +63 -0
  142. data/lib/stretchy/relations/query_methods/query_string.rb +44 -0
  143. data/lib/stretchy/relations/query_methods/regexp.rb +61 -0
  144. data/lib/stretchy/relations/query_methods/should.rb +51 -0
  145. data/lib/stretchy/relations/query_methods/size.rb +44 -0
  146. data/lib/stretchy/relations/query_methods/skip_callbacks.rb +47 -0
  147. data/lib/stretchy/relations/query_methods/source.rb +59 -0
  148. data/lib/stretchy/relations/query_methods/where.rb +113 -0
  149. data/lib/stretchy/relations/query_methods.rb +48 -569
  150. data/lib/stretchy/relations/scoping/default.rb +136 -0
  151. data/lib/stretchy/relations/scoping/named.rb +70 -0
  152. data/lib/stretchy/relations/scoping/scope_registry.rb +36 -0
  153. data/lib/stretchy/relations/scoping.rb +30 -0
  154. data/lib/stretchy/relations/search_option_methods.rb +2 -0
  155. data/lib/stretchy/version.rb +1 -1
  156. data/lib/stretchy.rb +17 -10
  157. metadata +111 -17
  158. data/lib/stretchy/common.rb +0 -38
  159. data/lib/stretchy/null_relation.rb +0 -53
  160. data/lib/stretchy/persistence.rb +0 -43
  161. data/lib/stretchy/refreshable.rb +0 -15
  162. data/lib/stretchy/scoping/default.rb +0 -134
  163. data/lib/stretchy/scoping/named.rb +0 -68
  164. data/lib/stretchy/scoping/scope_registry.rb +0 -34
  165. data/lib/stretchy/scoping.rb +0 -28
@@ -1,21 +1,27 @@
1
- #
2
- # attribute :ml, :sparse_vector
3
- #
4
- # {
5
- # "mappings": {
6
- # "properties": {
7
- # "ml.tokens": {
8
- # "type": "sparse_vector"
9
- # }
10
- # }
11
- # }
12
- # }
13
- #
14
-
15
-
16
1
  module Stretchy
17
2
  module Attributes
18
3
  module Type
4
+ # The SparseVector attribute type
5
+ #
6
+ # This class is used to define a sparse_vector attribute for a model. It provides support for the Elasticsearch sparse_vector data type, which is a type of data type that can hold sparse vectors of float values.
7
+ #
8
+ # ### Parameters
9
+ #
10
+ # - `type:` `:sparse_vector`.
11
+ # - `options:` The Hash of options for the attribute. This type does not have any specific options.
12
+ #
13
+ # ---
14
+ #
15
+ # ### Examples
16
+ #
17
+ # #### Define a sparse_vector attribute
18
+ #
19
+ # ```ruby
20
+ # class MyModel < StretchyModel
21
+ # attribute :ml, :sparse_vector
22
+ # end
23
+ # ```
24
+ #
19
25
  class SparseVector < Stretchy::Attributes::Type::Base
20
26
 
21
27
  def type
@@ -23,12 +29,10 @@ module Stretchy
23
29
  end
24
30
 
25
31
  def mappings(name)
26
- {
27
- properties: {
28
- "#{name}.tokens": {
29
- type: "sparse_vector"
30
- }
31
- }
32
+ {
33
+ "#{name}.tokens": {
34
+ type: "sparse_vector"
35
+ }
32
36
  }.as_json
33
37
  end
34
38
  end
@@ -1,5 +1,46 @@
1
1
  module Stretchy::Attributes::Type
2
- class String < Stretchy::Attributes::Type::Text # :nodoc:
2
+ # The String attribute type
3
+ #
4
+ # _alias for `:text`_
5
+ #
6
+ # This class is used to define a string attribute for a model. It provides support for the Elasticsearch text data type, which is a type of data type that can hold text. In this library, the string type is an alias for the text type.
7
+ #
8
+ # ### Parameters
9
+ #
10
+ # - `type:` `:string`.
11
+ # - `options:` The Hash of options for the attribute.
12
+ # - `:analyzer:` The String analyzer to be used for the text field, both at index-time and at search-time. Defaults to the default index analyzer, or the standard analyzer.
13
+ # - `:eager_global_ordinals:` The Boolean indicating if global ordinals should be loaded eagerly on refresh. Defaults to false.
14
+ # - `:fielddata:` The Boolean indicating if the field can use in-memory fielddata for sorting, aggregations, or scripting. Defaults to false.
15
+ # - `:fielddata_frequency_filter:` The Hash of expert settings which allow to decide which values to load in memory when fielddata is enabled.
16
+ # - `:fields:` The Hash of multi-fields allow the same string value to be indexed in multiple ways for different purposes. By default, a 'keyword' field is added. Set to false to disable.
17
+ # - `:index:` The Boolean indicating if the field should be searchable. Defaults to true.
18
+ # - `:index_options:` The String indicating what information should be stored in the index, for search and highlighting purposes. Defaults to 'positions'.
19
+ # - `:index_prefixes:` The Hash indicating if term prefixes of between 2 and 5 characters are indexed into a separate field.
20
+ # - `:index_phrases:` The Boolean indicating if two-term word combinations (shingles) are indexed into a separate field. Defaults to false.
21
+ # - `:norms:` The Boolean indicating if field-length should be taken into account when scoring queries. Defaults to true.
22
+ # - `:position_increment_gap:` The Integer indicating the number of fake term position which should be inserted between each element of an array of strings. Defaults to 100.
23
+ # - `:store:` The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
24
+ # - `:search_analyzer:` The String analyzer that should be used at search time on the text field. Defaults to the analyzer setting.
25
+ # - `:search_quote_analyzer:` The String analyzer that should be used at search time when a phrase is encountered. Defaults to the search_analyzer setting.
26
+ # - `:similarity:` The String indicating which scoring algorithm or similarity should be used. Defaults to 'BM25'.
27
+ # - `:term_vector:` The String indicating if term vectors should be stored for the field. Defaults to 'no'.
28
+ # - `:meta:` The Hash of metadata about the field.
29
+ #
30
+ # ---
31
+ #
32
+ # ### Examples
33
+ #
34
+ # #### Define a string attribute
35
+ #
36
+ # ```ruby
37
+ # class MyModel < StretchyModel
38
+ # attribute :name, :string
39
+ # end
40
+ # ```
41
+ #
42
+ class String < Stretchy::Attributes::Type::Text
43
+
3
44
  def type
4
45
  :string
5
46
  end
@@ -1,37 +1,57 @@
1
1
  module Stretchy::Attributes::Type
2
- # Public: Defines a text attribute for the model. This field type is used for text strings.
3
- #
4
- # opts - The Hash options used to refine the attribute (default: {}):
5
- # :analyzer - The String analyzer to be used for the text field, both at index-time and at search-time. Defaults to the default index analyzer, or the standard analyzer.
6
- # :eager_global_ordinals - The Boolean indicating if global ordinals should be loaded eagerly on refresh. Defaults to false.
7
- # :fielddata - The Boolean indicating if the field can use in-memory fielddata for sorting, aggregations, or scripting. Defaults to false.
8
- # :fielddata_frequency_filter - The Hash of expert settings which allow to decide which values to load in memory when fielddata is enabled.
9
- # :fields - The Hash of multi-fields allow the same string value to be indexed in multiple ways for different purposes. By default, a 'keyword' field is added. Set to false to disable.
10
- # :index - The Boolean indicating if the field should be searchable. Defaults to true.
11
- # :index_options - The String indicating what information should be stored in the index, for search and highlighting purposes. Defaults to 'positions'.
12
- # :index_prefixes - The Hash indicating if term prefixes of between 2 and 5 characters are indexed into a separate field.
13
- # :index_phrases - The Boolean indicating if two-term word combinations (shingles) are indexed into a separate field. Defaults to false.
14
- # :norms - The Boolean indicating if field-length should be taken into account when scoring queries. Defaults to true.
15
- # :position_increment_gap - The Integer indicating the number of fake term position which should be inserted between each element of an array of strings. Defaults to 100.
16
- # :store - The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
17
- # :search_analyzer - The String analyzer that should be used at search time on the text field. Defaults to the analyzer setting.
18
- # :search_quote_analyzer - The String analyzer that should be used at search time when a phrase is encountered. Defaults to the search_analyzer setting.
19
- # :similarity - The String indicating which scoring algorithm or similarity should be used. Defaults to 'BM25'.
20
- # :term_vector - The String indicating if term vectors should be stored for the field. Defaults to 'no'.
21
- # :meta - The Hash of metadata about the field.
22
- #
23
- #
24
- # Examples
25
- #
26
- # class MyModel
27
- # include StretchyModel
2
+ # The Text attribute type
3
+ #
4
+ # This class is used to define a text attribute for a model. It provides support for the Elasticsearch text data type, which is a type of data type that can hold text strings.
5
+ #
6
+ # >[!NOTE]
7
+ # >
8
+ # > The default for the `:text` type is to have a keyword multified if `field:` is not specified and `fields:` is not explicitly false.
9
+ # > This can be disabled by setting `Stretchy.configuration.add_keyword_field_to_text_attributes` to false.
10
+ # > The default keyword field name is `:keyword`, but this can be changed by setting `Stretchy.configuration.default_keyword_field`.
11
+ #
12
+ # ### Parameters
13
+ #
14
+ # - `type:` `:text`.
15
+ # - `options:` The Hash of options for the attribute.
16
+ # - `:analyzer:` The String analyzer to be used for the text field, both at index-time and at search-time. Defaults to the default index analyzer, or the standard analyzer.
17
+ # - `:eager_global_ordinals:` The Boolean indicating if global ordinals should be loaded eagerly on refresh. Defaults to false.
18
+ # - `:fielddata:` The Boolean indicating if the field can use in-memory fielddata for sorting, aggregations, or scripting. Defaults to false.
19
+ # - `:fielddata_frequency_filter:` The Hash of expert settings which allow to decide which values to load in memory when fielddata is enabled.
20
+ # - `:fields:` The Hash of multi-fields allow the same string value to be indexed in multiple ways for different purposes. By default, a 'keyword' field is added. Set to false to disable.
21
+ # - `:index:` The Boolean indicating if the field should be searchable. Defaults to true.
22
+ # - `:index_options:` The String indicating what information should be stored in the index, for search and highlighting purposes. Defaults to 'positions'.
23
+ # - `:index_prefixes:` The Hash indicating if term prefixes of between 2 and 5 characters are indexed into a separate field.
24
+ # - `:index_phrases:` The Boolean indicating if two-term word combinations (shingles) are indexed into a separate field. Defaults to false.
25
+ # - `:norms:` The Boolean indicating if field-length should be taken into account when scoring queries. Defaults to true.
26
+ # - `:position_increment_gap:` The Integer indicating the number of fake term position which should be inserted between each element of an array of strings. Defaults to 100.
27
+ # - `:store:` The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
28
+ # - `:search_analyzer:` The String analyzer that should be used at search time on the text field. Defaults to the analyzer setting.
29
+ # - `:search_quote_analyzer:` The String analyzer that should be used at search time when a phrase is encountered. Defaults to the search_analyzer setting.
30
+ # - `:similarity:` The String indicating which scoring algorithm or similarity should be used. Defaults to 'BM25'.
31
+ # - `:term_vector:` The String indicating if term vectors should be stored for the field. Defaults to 'no'.
32
+ # - `:meta:` The Hash of metadata about the field.
33
+ #
34
+ # ---
35
+ #
36
+ # ### Examples
37
+ #
38
+ # #### Define a text attribute
39
+ #
40
+ # ```ruby
41
+ # class MyModel < StretchyModel
28
42
  # attribute :description, :text, analyzer: 'english'
29
43
  # end
44
+ # ```
30
45
  #
31
- # Returns nothing.
32
46
  class Text < Stretchy::Attributes::Type::Base
33
47
  OPTIONS = [:analyzer, :eager_global_ordinals, :fielddata, :fielddata_frequency_filter, :fields, :index, :index_options, :index_prefixes, :index_phrases, :norms, :position_increment_gap, :store, :search_analyzer, :search_quote_analyzer, :similarity, :term_vector, :meta]
34
-
48
+
49
+ def initialize(**args)
50
+ # Add a keyword field by default if no fields are specified
51
+ args.reverse_merge!(fields: {keyword: {type: :keyword, ignore_above: 256}}) if args[:fields].nil? && Stretchy.configuration.add_keyword_field_to_text_attributes
52
+ super
53
+ end
54
+
35
55
  def type
36
56
  :text
37
57
  end
@@ -40,6 +60,11 @@ module Stretchy::Attributes::Type
40
60
  :text
41
61
  end
42
62
 
63
+ # The default for the `:text` type is to have a keyword field if no fields are specified.
64
+ def keyword_field?
65
+ fields.find { |k,d| d[:type].to_sym == :keyword}.present?
66
+ end
67
+
43
68
  def mappings(name)
44
69
  options = {type: type_for_database}
45
70
  OPTIONS.each { |option| options[option] = send(option) unless send(option).nil? }
@@ -1,21 +1,31 @@
1
1
  module Stretchy::Attributes::Type
2
- # Public: Defines a token_count attribute for the model. This field type is used for counting the number of tokens in a string.
2
+ # The TokenCount attribute type
3
3
  #
4
- # opts - The Hash options used to refine the attribute (default: {}):
5
- # :analyzer - The String analyzer to be used to analyze the string value. Required.
6
- # :enable_position_increments - The Boolean indicating if position increments should be counted. Defaults to true.
7
- # :doc_values - The Boolean indicating if the field should be stored on disk in a column-stride fashion. Defaults to true.
8
- # :index - The Boolean indicating if the field should be searchable. Defaults to true.
9
- # :null_value - The Numeric value to be substituted for any explicit null values. Defaults to null.
10
- # :store - The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
4
+ # This class is used to define a token_count attribute for a model. It provides support for the Elasticsearch token_count data type, which is a type of data type that can count the number of tokens in a string.
11
5
  #
12
- # Examples
6
+ # ### Parameters
13
7
  #
14
- # class MyModel < Stretchy::Record
8
+ # - `type:` `:token_count`.
9
+ # - `options:` The Hash of options for the attribute.
10
+ # - `:analyzer:` The String analyzer to be used to analyze the string value. Required.
11
+ # - `:enable_position_increments:` The Boolean indicating if position increments should be counted. Defaults to true.
12
+ # - `:doc_values:` The Boolean indicating if the field should be stored on disk in a column-stride fashion. Defaults to true.
13
+ # - `:index:` The Boolean indicating if the field should be searchable. Defaults to true.
14
+ # - `:null_value:` The Numeric value to be substituted for any explicit null values. Defaults to null.
15
+ # - `:store:` The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
16
+ #
17
+ # ---
18
+ #
19
+ # ### Examples
20
+ #
21
+ # #### Define a token_count attribute
22
+ #
23
+ # ```ruby
24
+ # class MyModel < StretchyModel
15
25
  # attribute :description_token_count, :token_count, analyzer: 'standard'
16
26
  # end
27
+ # ```
17
28
  #
18
- # Returns nothing.
19
29
  class TokenCount < Stretchy::Attributes::Type::Base
20
30
  OPTIONS = [:analyzer, :enable_position_increments, :doc_values, :index, :null_value, :store]
21
31
 
@@ -1,16 +1,26 @@
1
1
  module Stretchy::Attributes::Type
2
- # Public: Defines a version attribute for the model. This field type is used for software versions following the Semantic Versioning rules.
2
+ # The Version attribute type
3
3
  #
4
- # opts - The Hash options used to refine the attribute (default: {}):
5
- # :meta - The Hash of metadata about the field.
4
+ # This class is used to define a version attribute for a model. This field type is used for software versions following the Semantic Versioning rules.
6
5
  #
7
- # Examples
6
+ # ### Parameters
8
7
  #
9
- # class MyModel < Stretchy::Record
8
+ # - `type:` `:version`.
9
+ # - `options:` The Hash of options for the attribute.
10
+ # - `:meta:` The Hash of metadata about the field.
11
+ #
12
+ # ---
13
+ #
14
+ # ### Examples
15
+ #
16
+ # #### Define a version attribute
17
+ #
18
+ # ```ruby
19
+ # class MyModel < StretchyModel
10
20
  # attribute :software_version, :version
11
21
  # end
22
+ # ```
12
23
  #
13
- # Returns nothing.
14
24
  class Version < Stretchy::Attributes::Type::Base
15
25
  OPTIONS = [:meta]
16
26
 
@@ -1,33 +1,44 @@
1
1
  module Stretchy::Attributes::Type
2
- # Public: Defines a wildcard attribute for the model. This field type is a specialization of the keyword field, but it supports wildcard searches.
3
- #
4
- # opts - The Hash options used to refine the attribute (default: {}):
5
- # :doc_values - The Boolean indicating if the field should be stored on disk in a column-stride fashion. Defaults to true.
6
- # :eager_global_ordinals - The Boolean indicating if global ordinals should be loaded eagerly on refresh. Defaults to false.
7
- # :fields - The Hash of multi-fields for the same string value to be indexed in multiple ways.
8
- # :ignore_above - The Integer limit for the length of the string. Strings longer than this limit will not be indexed. Defaults to 2147483647.
9
- # :index - The Boolean indicating if the field should be quickly searchable. Defaults to true.
10
- # :index_options - The String indicating what information should be stored in the index for scoring purposes. Defaults to 'docs'.
11
- # :meta - The Hash metadata about the field.
12
- # :norms - The Boolean indicating if field-length should be taken into account when scoring queries. Defaults to false.
13
- # :null_value - The String value to be substituted for any explicit null values. Defaults to null.
14
- # :on_script_error - The String defining what to do if the script defined by the :script parameter throws an error at indexing time. Can be 'fail' or 'continue'.
15
- # :script - The String script that will index values generated by this script, rather than reading the values directly from the source.
16
- # :store - The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
17
- # :similarity - The String scoring algorithm or similarity to be used. Defaults to 'BM25'.
18
- # :normalizer - The String pre-processor for the keyword prior to indexing. Defaults to null.
19
- # :split_queries_on_whitespace - The Boolean indicating if full text queries should split the input on whitespace. Defaults to false.
20
- # :time_series_dimension - The Boolean indicating if the field is a time series dimension. Defaults to false.
21
- #
22
- # Examples
23
- #
24
- # class MyModel
25
- # include StretchyModel
2
+ # The Wildcard attribute type
3
+ #
4
+ # This class is used to define a wildcard attribute for a model. This field type is a specialization of the keyword field, but it supports wildcard searches.
5
+ #
6
+ # ### Parameters
7
+ #
8
+ # - `type:` `:wildcard`.
9
+ # - `options:` The Hash of options for the attribute.
10
+ # - `:doc_values:` The Boolean indicating if the field should be stored on disk in a column-stride fashion. Defaults to true.
11
+ # - `:eager_global_ordinals:` The Boolean indicating if global ordinals should be loaded eagerly on refresh. Defaults to false.
12
+ # - `:fields:` The Hash of multi-fields for the same string value to be indexed in multiple ways.
13
+ # - `:ignore_above:` The Integer limit for the length of the string. Strings longer than this limit will not be indexed. Defaults to 2147483647.
14
+ # - `:index:` The Boolean indicating if the field should be quickly searchable. Defaults to true.
15
+ # - `:index_options:` The String indicating what information should be stored in the index for scoring purposes. Defaults to 'docs'.
16
+ # - `:meta:` The Hash metadata about the field.
17
+ # - `:norms:` The Boolean indicating if field-length should be taken into account when scoring queries. Defaults to false.
18
+ # - `:null_value:` The String value to be substituted for any explicit null values. Defaults to null.
19
+ # - `:on_script_error:` The String defining what to do if the script defined by the :script parameter throws an error at indexing time. Can be 'fail' or 'continue'.
20
+ # - `:script:` The String script that will index values generated by this script, rather than reading the values directly from the source.
21
+ # - `:store:` The Boolean indicating if the field value should be stored and retrievable separately from the _source field. Defaults to false.
22
+ # - `:similarity:` The String scoring algorithm or similarity to be used. Defaults to 'BM25'.
23
+ # - `:normalizer:` The String pre-processor for the keyword prior to indexing. Defaults to null.
24
+ # - `:split_queries_on_whitespace:` The Boolean indicating if full text queries should split the input on whitespace. Defaults to false.
25
+ # - `:time_series_dimension:` The Boolean indicating if the field is a time series dimension. Defaults to false.
26
+ #
27
+ # ---
28
+ #
29
+ # ### Examples
30
+ #
31
+ # #### Define a wildcard attribute
32
+ #
33
+ # ```ruby
34
+ # class MyModel < StretchyModel
26
35
  # attribute :description_wildcard, :wildcard
27
36
  # end
37
+ # ```
28
38
  #
29
- # Returns nothing.
30
39
  class Wildcard < Stretchy::Attributes::Type::Keyword
40
+ OPTIONS = [:doc_values, :eager_global_ordinals, :fields, :ignore_above, :index, :index_options, :meta, :norms, :null_value, :on_script_error, :script, :store, :similarity, :normalizer, :split_queries_on_whitespace, :time_series_dimension]
41
+ attr_reader *OPTIONS
31
42
  def type
32
43
  :wildcard
33
44
  end
@@ -1,4 +1,32 @@
1
1
  module Stretchy
2
+ # used to define and manage the attributes of a model in Stretchy. It provides methods for getting and setting attribute values, inspecting the model, and registering attribute types.
3
+ #
4
+ # ### Methods
5
+ #
6
+ # - `[](attribute)`: Retrieves the value of the specified attribute.
7
+ # - `[]=(attribute, value)`: Sets the value of the specified attribute.
8
+ # - `inspect`: Returns a string representation of the model, including its class name and attributes.
9
+ # - `self.inspect`: Returns a string representation of the model class, including its name and attribute types.
10
+ # - `attribute_mappings`: Returns a JSON representation of the attribute mappings for the model.
11
+ # - `self.register!`: Registers the attribute types with ActiveModel.
12
+ #
13
+ # ### Example
14
+ #
15
+ # In this example, the `Attributes` module is used to define an attribute for `MyModel`, get and set the attribute value, inspect the model and the model class, and get the attribute mappings.
16
+ #
17
+ # ```ruby
18
+ # class MyModel < Stretchy::Record
19
+ # attribute :title, :string
20
+ # end
21
+ #
22
+ # model = MyModel.new(title: "hello")
23
+ # model[:title] # => "hello"
24
+ # model.inspect # => "#<MyModel title: hello>"
25
+ # MyModel.inspect # => "#<MyModel title: string>"
26
+ # MyModel.attribute_mappings # => {properties: {title: {type: "string"}}}
27
+ # ```
28
+ #
29
+ #
2
30
  module Attributes
3
31
  extend ActiveSupport::Concern
4
32
 
@@ -38,6 +66,7 @@ module Stretchy
38
66
  ActiveModel::Type.register(:ip, Stretchy::Attributes::Type::IP)
39
67
  ActiveModel::Type.register(:join, Stretchy::Attributes::Type::Join)
40
68
  ActiveModel::Type.register(:keyword, Stretchy::Attributes::Type::Keyword)
69
+ ActiveModel::Type.register(:knn_vector, Stretchy::Attributes::Type::KnnVector)
41
70
  ActiveModel::Type.register(:match_only_text, Stretchy::Attributes::Type::MatchOnlyText)
42
71
  ActiveModel::Type.register(:nested, Stretchy::Attributes::Type::Nested)
43
72
  ActiveModel::Type.register(:percolator, Stretchy::Attributes::Type::Percolator)
@@ -18,6 +18,33 @@ module Stretchy
18
18
  :count,
19
19
  to: :gateway
20
20
 
21
+ # This method is used to set or retrieve the index name for the Elasticsearch index.
22
+ #
23
+ # ### Parameters
24
+ #
25
+ # - `name:` (String, nil) - The name to set for the index. If nil, the method will act as a getter.
26
+ # - `&block:` A block that returns the index name when called.
27
+ #
28
+ # ### Returns
29
+ #
30
+ # - (String) - The index name.
31
+ #
32
+ # ### Behavior
33
+ #
34
+ # - If a name or block is provided, it sets the index name to the provided name or block.
35
+ # - If no argument is provided, it retrieves the index name.
36
+ # - If the index name is callable (e.g., a Proc), it calls the block.
37
+ # - If the index name is not set, it defaults to the parameterized and underscored collection name of the base class model.
38
+ #
39
+ # ### Example
40
+ #
41
+ # In this example, the index name for instances of `MyModel` will be "my_custom_index" instead of "my_models".
42
+ #
43
+ # ```ruby
44
+ # class MyModel < Stretchy::Record
45
+ # index_name "my_custom_index"
46
+ # end
47
+ # ```
21
48
  def index_name(name=nil, &block)
22
49
  if name || block_given?
23
50
  return (@index_name = name || block)
@@ -30,6 +57,30 @@ module Stretchy
30
57
  end
31
58
  end
32
59
 
60
+ # This method is used to set or retrieve the settings for the Elasticsearch index.
61
+ #
62
+ # ### Parameters
63
+ #
64
+ # - `settings:` (Hash) - The settings to set for the index. If empty, the method will act as a getter.
65
+ #
66
+ # ### Returns
67
+ #
68
+ # - (Hash) - The index settings.
69
+ #
70
+ # ### Behavior
71
+ #
72
+ # - If settings are provided, it sets the index settings to the provided settings.
73
+ # - If no argument is provided, it retrieves the index settings.
74
+ # - If the `default_pipeline` is set, it merges it into the index settings.
75
+ #
76
+ # ### Example
77
+ #
78
+ # ```ruby
79
+ # class MyModel < Stretchy::Record
80
+ # index_settings number_of_shards: 5, number_of_replicas: 1
81
+ # end
82
+ # ```
83
+ # In this example, the index settings for instances of `MyModel` will have 5 shards and 1 replica.
33
84
  def index_settings(settings={})
34
85
  @index_settings ||= settings
35
86
  @index_settings.merge!(default_pipeline: default_pipeline.to_s) if default_pipeline
@@ -40,6 +91,33 @@ module Stretchy
40
91
  @gateway = nil
41
92
  end
42
93
 
94
+ # This method is used to access the underlying `Stretchy::Repository` for the model. It creates the repository if it doesn't exist, and reuses it if it does.
95
+ #
96
+ # ### Parameters
97
+ #
98
+ # - `&block:` (optional) A block that is evaluated in the context of the repository. This can be used to perform operations on the repository.
99
+ #
100
+ # ### Returns
101
+ #
102
+ # - (Stretchy::Repository) - The repository for the model.
103
+ #
104
+ # ### Behavior
105
+ #
106
+ # - If the repository doesn't exist or if the client of the existing repository is not the current client from the Stretchy configuration,
107
+ # it creates a new repository with the current `client`, `index_name`, `class`, `mapping`, and `settings`.
108
+ # - If a block is given, it is evaluated in the context of the repository.
109
+ # - It always returns the repository.
110
+ #
111
+ # ### Example
112
+ #
113
+ # ```ruby
114
+ # class MyModel < Stretchy::Record
115
+ # gateway do
116
+ # # Perform operations on the repository
117
+ # end
118
+ # end
119
+ # ```
120
+ #
43
121
  def gateway(&block)
44
122
  reload_gateway_configuration! if @gateway && @gateway.client != Stretchy.configuration.client
45
123
 
@@ -0,0 +1,94 @@
1
+ module Stretchy
2
+ # This class is used to define settings for an Elasticsearch index.
3
+ # It provides methods to define analyzers, filters, tokenizers, and normalizers.
4
+ #
5
+ # ## Usage
6
+ # ```ruby
7
+ # class MyIndexSetting < Stretchy::IndexSetting
8
+ # analyzer :default,
9
+ # filter: [:lowercase, :asciifolding],
10
+ # tokenizer: :standard
11
+ #
12
+ # filter :my_stemmer,
13
+ # type: :stemmer,
14
+ # name: :light_english
15
+ #
16
+ # tokenizer :path_tokenizer,
17
+ # type: :path_hierarchy,
18
+ # reverse: true
19
+ #
20
+ # normalizer :my_normalizer,
21
+ # type: :custom,
22
+ # filter: [:lowercase]
23
+ # end
24
+ # ```
25
+ #
26
+ # In this example, we define a custom index setting `MyIndexSetting` that includes a default analyzer, a custom filter, a custom tokenizer, and a custom normalizer.
27
+ #
28
+ # ## Methods
29
+ # - `analyzer(name, options)`: Defines an analyzer with the given name and options.
30
+ # - `filter(name, options)`: Defines a filter with the given name and options.
31
+ # - `tokenizer(name, options)`: Defines a tokenizer with the given name and options.
32
+ # - `normalizer(name, options)`: Defines a normalizer with the given name and options.
33
+ #
34
+ # Each of these methods takes a name as the first argument and a hash of options as the second argument. The options will depend on the specific type of analyzer, filter, tokenizer, or normalizer being defined.
35
+ #
36
+ # ### Accessing Defined Settings
37
+ # You can access the settings defined in an `IndexSetting` subclass using the `analyzers`, `filters`, `tokenizers`, and `normalizers` methods. These methods return a hash of the defined settings for their respective type.
38
+ #
39
+ # ```ruby
40
+ # MyIndexSetting.analyzers
41
+ # # => { default: { filter: [:lowercase, :asciifolding], tokenizer: :standard } }
42
+ # ```
43
+ #
44
+ # ```ruby
45
+ # MyIndexSetting.filters
46
+ # # => { my_stemmer: { type: :stemmer, name: :light_english } }
47
+ # ```
48
+ #
49
+ # ```ruby
50
+ # MyIndexSetting.tokenizers
51
+ # # => { path_tokenizer: { type: :path_hierarchy, reverse: true } }
52
+ # ```
53
+ #
54
+ # ```ruby
55
+ # MyIndexSetting.normalizers
56
+ # # => { my_normalizer: { type: :custom, filter: [:lowercase] } }
57
+ # ```
58
+ # ### Using the Settings
59
+ # You can use the settings defined in an `IndexSetting` subclass to configure the settings for a `StretchyModel`.
60
+ #
61
+ # ```ruby
62
+ # class MyModel < StretchyModel
63
+ # index_settings(MyIndexSetting.as_json)
64
+ # end
65
+ # ```
66
+ #
67
+ class IndexSetting
68
+ class << self
69
+ METHODS = [:analyzer, :filter, :tokenizer, :normalizer]
70
+
71
+ def settings
72
+ @settings ||= {}
73
+ end
74
+
75
+ def as_json
76
+ {
77
+ self.name.demodulize.underscore.to_sym => settings
78
+ }
79
+ end
80
+
81
+ METHODS.each do |method|
82
+ define_method(method) do |*args|
83
+ settings[method] ||= {}
84
+ settings[method][args.shift] = Hash[*args] unless args.empty?
85
+ end
86
+
87
+ define_method("#{method}s") do
88
+ settings[method] || {}
89
+ end
90
+ end
91
+
92
+ end
93
+ end
94
+ end