picky 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. data/bin/picky +14 -0
  2. data/lib/bundling.rb +10 -0
  3. data/lib/constants.rb +9 -0
  4. data/lib/deployment.rb +212 -0
  5. data/lib/picky/application.rb +40 -0
  6. data/lib/picky/cacher/convenience.rb +3 -0
  7. data/lib/picky/cacher/generator.rb +17 -0
  8. data/lib/picky/cacher/partial/default.rb +7 -0
  9. data/lib/picky/cacher/partial/none.rb +19 -0
  10. data/lib/picky/cacher/partial/strategy.rb +7 -0
  11. data/lib/picky/cacher/partial/subtoken.rb +91 -0
  12. data/lib/picky/cacher/partial_generator.rb +15 -0
  13. data/lib/picky/cacher/similarity/default.rb +7 -0
  14. data/lib/picky/cacher/similarity/double_levenshtone.rb +73 -0
  15. data/lib/picky/cacher/similarity/none.rb +25 -0
  16. data/lib/picky/cacher/similarity/strategy.rb +7 -0
  17. data/lib/picky/cacher/similarity_generator.rb +15 -0
  18. data/lib/picky/cacher/weights/default.rb +7 -0
  19. data/lib/picky/cacher/weights/logarithmic.rb +39 -0
  20. data/lib/picky/cacher/weights/strategy.rb +7 -0
  21. data/lib/picky/cacher/weights_generator.rb +15 -0
  22. data/lib/picky/configuration/configuration.rb +13 -0
  23. data/lib/picky/configuration/field.rb +68 -0
  24. data/lib/picky/configuration/indexes.rb +60 -0
  25. data/lib/picky/configuration/queries.rb +32 -0
  26. data/lib/picky/configuration/type.rb +52 -0
  27. data/lib/picky/cores.rb +101 -0
  28. data/lib/picky/db/configuration.rb +23 -0
  29. data/lib/picky/ext/ruby19/extconf.rb +7 -0
  30. data/lib/picky/ext/ruby19/performant.c +339 -0
  31. data/lib/picky/extensions/array.rb +45 -0
  32. data/lib/picky/extensions/hash.rb +11 -0
  33. data/lib/picky/extensions/module.rb +15 -0
  34. data/lib/picky/extensions/symbol.rb +18 -0
  35. data/lib/picky/generator.rb +156 -0
  36. data/lib/picky/helpers/cache.rb +23 -0
  37. data/lib/picky/helpers/gc.rb +11 -0
  38. data/lib/picky/helpers/measuring.rb +45 -0
  39. data/lib/picky/helpers/search.rb +27 -0
  40. data/lib/picky/index/bundle.rb +328 -0
  41. data/lib/picky/index/category.rb +109 -0
  42. data/lib/picky/index/combined.rb +38 -0
  43. data/lib/picky/index/type.rb +30 -0
  44. data/lib/picky/indexers/base.rb +77 -0
  45. data/lib/picky/indexers/default.rb +3 -0
  46. data/lib/picky/indexers/field.rb +13 -0
  47. data/lib/picky/indexers/no_source_specified_error.rb +5 -0
  48. data/lib/picky/indexers/solr.rb +60 -0
  49. data/lib/picky/indexes.rb +180 -0
  50. data/lib/picky/initializers/ext.rb +6 -0
  51. data/lib/picky/initializers/mysql.rb +22 -0
  52. data/lib/picky/loader.rb +287 -0
  53. data/lib/picky/loggers/search.rb +19 -0
  54. data/lib/picky/performant/array.rb +23 -0
  55. data/lib/picky/query/allocation.rb +82 -0
  56. data/lib/picky/query/allocations.rb +131 -0
  57. data/lib/picky/query/base.rb +124 -0
  58. data/lib/picky/query/combination.rb +69 -0
  59. data/lib/picky/query/combinations.rb +106 -0
  60. data/lib/picky/query/combinator.rb +92 -0
  61. data/lib/picky/query/full.rb +15 -0
  62. data/lib/picky/query/live.rb +22 -0
  63. data/lib/picky/query/qualifiers.rb +73 -0
  64. data/lib/picky/query/solr.rb +77 -0
  65. data/lib/picky/query/token.rb +215 -0
  66. data/lib/picky/query/tokens.rb +102 -0
  67. data/lib/picky/query/weigher.rb +159 -0
  68. data/lib/picky/query/weights.rb +55 -0
  69. data/lib/picky/rack/harakiri.rb +37 -0
  70. data/lib/picky/results/base.rb +103 -0
  71. data/lib/picky/results/full.rb +19 -0
  72. data/lib/picky/results/live.rb +19 -0
  73. data/lib/picky/routing.rb +165 -0
  74. data/lib/picky/signals.rb +11 -0
  75. data/lib/picky/solr/schema_generator.rb +73 -0
  76. data/lib/picky/sources/base.rb +19 -0
  77. data/lib/picky/sources/csv.rb +30 -0
  78. data/lib/picky/sources/db.rb +77 -0
  79. data/lib/picky/tokenizers/base.rb +130 -0
  80. data/lib/picky/tokenizers/default.rb +3 -0
  81. data/lib/picky/tokenizers/index.rb +73 -0
  82. data/lib/picky/tokenizers/query.rb +70 -0
  83. data/lib/picky/umlaut_substituter.rb +21 -0
  84. data/lib/picky-tasks.rb +6 -0
  85. data/lib/picky.rb +18 -0
  86. data/lib/tasks/application.rake +5 -0
  87. data/lib/tasks/cache.rake +53 -0
  88. data/lib/tasks/framework.rake +4 -0
  89. data/lib/tasks/index.rake +29 -0
  90. data/lib/tasks/server.rake +48 -0
  91. data/lib/tasks/shortcuts.rake +13 -0
  92. data/lib/tasks/solr.rake +36 -0
  93. data/lib/tasks/spec.rake +11 -0
  94. data/lib/tasks/statistics.rake +13 -0
  95. data/lib/tasks/try.rake +29 -0
  96. data/prototype_project/Gemfile +23 -0
  97. data/prototype_project/Rakefile +1 -0
  98. data/prototype_project/app/README +6 -0
  99. data/prototype_project/app/application.rb +50 -0
  100. data/prototype_project/app/application.ru +29 -0
  101. data/prototype_project/app/db.yml +10 -0
  102. data/prototype_project/app/logging.rb +20 -0
  103. data/prototype_project/app/unicorn.ru +10 -0
  104. data/prototype_project/log/README +1 -0
  105. data/prototype_project/script/console +34 -0
  106. data/prototype_project/tmp/README +0 -0
  107. data/prototype_project/tmp/pids/README +0 -0
  108. data/spec/ext/performant_spec.rb +64 -0
  109. data/spec/lib/application_spec.rb +61 -0
  110. data/spec/lib/cacher/partial/subtoken_spec.rb +89 -0
  111. data/spec/lib/cacher/partial_generator_spec.rb +35 -0
  112. data/spec/lib/cacher/similarity/double_levenshtone_spec.rb +60 -0
  113. data/spec/lib/cacher/similarity/none_spec.rb +23 -0
  114. data/spec/lib/cacher/similarity_generator_spec.rb +22 -0
  115. data/spec/lib/cacher/weights/logarithmic_spec.rb +30 -0
  116. data/spec/lib/cacher/weights_generator_spec.rb +21 -0
  117. data/spec/lib/configuration/configuration_spec.rb +38 -0
  118. data/spec/lib/configuration/type_spec.rb +49 -0
  119. data/spec/lib/configuration_spec.rb +8 -0
  120. data/spec/lib/cores_spec.rb +65 -0
  121. data/spec/lib/extensions/array_spec.rb +37 -0
  122. data/spec/lib/extensions/hash_spec.rb +11 -0
  123. data/spec/lib/extensions/module_spec.rb +27 -0
  124. data/spec/lib/extensions/symbol_spec.rb +85 -0
  125. data/spec/lib/generator_spec.rb +135 -0
  126. data/spec/lib/helpers/cache_spec.rb +35 -0
  127. data/spec/lib/helpers/gc_spec.rb +71 -0
  128. data/spec/lib/helpers/measuring_spec.rb +18 -0
  129. data/spec/lib/helpers/search_spec.rb +50 -0
  130. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +47 -0
  131. data/spec/lib/index/bundle_spec.rb +260 -0
  132. data/spec/lib/index/category_spec.rb +203 -0
  133. data/spec/lib/indexers/base_spec.rb +73 -0
  134. data/spec/lib/indexers/field_spec.rb +20 -0
  135. data/spec/lib/loader_spec.rb +48 -0
  136. data/spec/lib/loggers/search_spec.rb +19 -0
  137. data/spec/lib/performant/array_spec.rb +13 -0
  138. data/spec/lib/query/allocation_spec.rb +194 -0
  139. data/spec/lib/query/allocations_spec.rb +336 -0
  140. data/spec/lib/query/base_spec.rb +104 -0
  141. data/spec/lib/query/combination_spec.rb +90 -0
  142. data/spec/lib/query/combinations_spec.rb +83 -0
  143. data/spec/lib/query/combinator_spec.rb +112 -0
  144. data/spec/lib/query/full_spec.rb +22 -0
  145. data/spec/lib/query/live_spec.rb +61 -0
  146. data/spec/lib/query/qualifiers_spec.rb +31 -0
  147. data/spec/lib/query/solr_spec.rb +51 -0
  148. data/spec/lib/query/token_spec.rb +297 -0
  149. data/spec/lib/query/tokens_spec.rb +189 -0
  150. data/spec/lib/query/weights_spec.rb +47 -0
  151. data/spec/lib/results/base_spec.rb +233 -0
  152. data/spec/lib/routing_spec.rb +318 -0
  153. data/spec/lib/solr/schema_generator_spec.rb +42 -0
  154. data/spec/lib/sources/db_spec.rb +91 -0
  155. data/spec/lib/tokenizers/base_spec.rb +61 -0
  156. data/spec/lib/tokenizers/index_spec.rb +51 -0
  157. data/spec/lib/tokenizers/query_spec.rb +105 -0
  158. data/spec/lib/umlaut_substituter_spec.rb +84 -0
  159. data/spec/specific/speed_spec.rb +55 -0
  160. metadata +371 -15
  161. data/README.textile +0 -9
@@ -0,0 +1,68 @@
1
+ module Configuration
2
+
3
+ class Field
4
+ attr_reader :name, :indexed_name, :virtual
5
+ attr_accessor :type # convenience
6
+ def initialize name, options = {}
7
+ @name = name
8
+
9
+ # TODO Dup the options?
10
+ # TODO add source as option
11
+
12
+ @indexer_class = options.delete(:indexer) || Indexers::Default
13
+ @tokenizer_class = options.delete(:tokenizer) || Tokenizers::Index # Default
14
+
15
+ @indexed_name = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
16
+ @virtual = options.delete(:virtual) || false
17
+
18
+ Query::Qualifiers.add(name, options[:qualifiers]) if options[:qualifiers]
19
+
20
+ # @remove = options[:remove] || false
21
+ # @filter = options[:filter] || true
22
+
23
+ @options = options
24
+ end
25
+ def source
26
+ @source || type.source
27
+ end
28
+ def generate
29
+ Index::Category.new self.name, type, @options
30
+ end
31
+ # TODO Duplicate code in bundle. Move to application.
32
+ #
33
+ # TODO Move to type, and use in bundle from there.
34
+ #
35
+ def search_index_root
36
+ File.join SEARCH_ROOT, 'index'
37
+ end
38
+ # TODO Move to config. Duplicate Code in field.rb.
39
+ #
40
+ def cache_directory
41
+ File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
42
+ end
43
+ def search_index_file_name
44
+ File.join cache_directory, "#{type.name}_#{name}_index.txt"
45
+ end
46
+ def index
47
+ prepare_cache_directory
48
+ indexer.index
49
+ end
50
+ def prepare_cache_directory
51
+ FileUtils.mkdir_p cache_directory
52
+ end
53
+ def cache
54
+ prepare_cache_directory
55
+ generate.generate_caches
56
+ end
57
+ def indexer
58
+ @indexer || @indexer = @indexer_class.new(type, self)
59
+ end
60
+ def tokenizer
61
+ @tokenizer || @tokenizer = @tokenizer_class.new # TODO Make instances.
62
+ end
63
+ def virtual?
64
+ !!virtual
65
+ end
66
+ end
67
+
68
+ end
@@ -0,0 +1,60 @@
1
+ module Configuration
2
+
3
+ class Indexes
4
+
5
+ attr_reader :types
6
+
7
+ def initialize *types
8
+ @types = types
9
+ end
10
+
11
+ #
12
+ #
13
+ def default_index
14
+ Tokenizers::Index
15
+ end
16
+
17
+ # Delegates
18
+ #
19
+ delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after, :to => :default_index
20
+
21
+ #
22
+ #
23
+ def type name, *fields
24
+ types << Type.new(name, *fields)
25
+ end
26
+ def field name, options = {}
27
+ Field.new name, options
28
+ end
29
+
30
+ #
31
+ #
32
+ def take_snapshot *type_names
33
+ only_if_included_in type_names do |type|
34
+ type.take_snapshot
35
+ end
36
+ end
37
+ def index *type_names
38
+ only_if_included_in type_names do |type|
39
+ type.index
40
+ end
41
+ end
42
+ def index_solr *type_names
43
+ only_if_included_in type_names do |type|
44
+ type.index_solr
45
+ end
46
+ end
47
+
48
+ #
49
+ #
50
+ def only_if_included_in type_names = []
51
+ type_names = types.map(&:name) if type_names.empty?
52
+ types.each do |type|
53
+ next unless type_names.include?(type.name)
54
+ yield type
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,32 @@
1
+ module Configuration
2
+
3
+ class Queries
4
+
5
+ attr_reader :routing
6
+
7
+ #
8
+ #
9
+ def initialize routing
10
+ @routing = routing
11
+ end
12
+
13
+ #
14
+ #
15
+ def default_index
16
+ Tokenizers::Query
17
+ end
18
+
19
+ # Routes.
20
+ #
21
+ delegate :defaults, :route, :live, :full, :root, :default, :to => :routing
22
+
23
+ # Delegates.
24
+ #
25
+ def maximum_tokens amount
26
+ Query::Tokens.maximum = amount
27
+ end
28
+ delegate :illegal_characters, :contract_expressions, :stopwords, :split_text_on, :normalize_words, :illegal_characters_after, :to => :default_index
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,52 @@
1
+ module Configuration
2
+ class Type
3
+ attr_reader :name,
4
+ :source,
5
+ :fields,
6
+ :after_indexing,
7
+ :result_type,
8
+ :ignore_unassigned_tokens,
9
+ :solr
10
+ def initialize name, source, *fields, options
11
+ if Configuration::Field === options
12
+ fields << options
13
+ options = {}
14
+ end
15
+
16
+ @name = name
17
+ @source = source
18
+ # dup, if field is reused. TODO Rewrite.
19
+ @fields = fields.map { |field| field = field.dup; field.type = self; field }
20
+
21
+ @after_indexing = options[:after_indexing]
22
+ @result_type = options[:result_type] || name
23
+ @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
24
+ @solr = options[:solr] || nil
25
+ end
26
+ def generate
27
+ categories = fields.map { |field| field.generate }
28
+ Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
29
+ end
30
+ def table_name
31
+ self # FIXME UGH, Remove anyway
32
+ end
33
+ def take_snapshot
34
+ source.take_snapshot self
35
+ end
36
+ def index
37
+ fields.each do |field|
38
+ field.index
39
+ end
40
+ end
41
+ def solr_fields
42
+ solr ? fields.select { |field| !field.virtual? } : []
43
+ end
44
+ # TODO Delegate to Solr handler.
45
+ #
46
+ def index_solr
47
+ return unless solr
48
+ @indexer = Indexers::Solr.new self
49
+ @indexer.index
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,101 @@
1
+ Infinity = 1.0/0
2
+
3
+ # Handles processing over multiple cores.
4
+ #
5
+ class Cores
6
+
7
+ # Pass it an ary or generator.
8
+ #
9
+ # generator = (1..10).each
10
+ # forked generator, :max => 5 do |element|
11
+ #
12
+ # end
13
+ #
14
+ # Options include:
15
+ # * max: Maximum # of processors to use. Default is all it can get.
16
+ #
17
+ def self.forked ary_or_generator, options = {}
18
+ ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
19
+ generator = ary_or_generator.each
20
+
21
+ # Get the maximum number of processors.
22
+ #
23
+ max = max_processors options
24
+ currently_processing = 0
25
+
26
+ #
27
+ #
28
+ while generator
29
+
30
+ # Ramp it up to num processors.
31
+ #
32
+ while currently_processing < max
33
+
34
+ currently_processing = currently_processing + 1
35
+
36
+ element = nil
37
+ begin
38
+ element = generator.next
39
+ rescue StopIteration => si
40
+ break
41
+ end
42
+ break unless element
43
+
44
+ Process.fork do
45
+ yield element
46
+ end
47
+
48
+ end
49
+
50
+ begin
51
+ Process.wait 0 # Block and wait for any child to finish.
52
+ rescue Errno::ECHILD => e
53
+ break
54
+ ensure
55
+ currently_processing = currently_processing - 1
56
+ end
57
+ end
58
+ end
59
+
60
+ # Return the number of maximum usable processors.
61
+ #
62
+ def self.max_processors options = {}
63
+ options[:amount] || [number_of_cores, (options[:max] || Infinity)].min
64
+ end
65
+
66
+ # Gets the number of cores depending on OS.
67
+ #
68
+ def self.number_of_cores
69
+ extract_cores_for actual_platform
70
+ end
71
+ # Extracts the platform os from the platform.
72
+ #
73
+ # Note: Could also use 'rbconfig'.
74
+ #
75
+ def self.actual_platform
76
+ matched = platform.match(/-\b([a-z]*)/)
77
+ matched && matched[1]
78
+ end
79
+ # Returns a mapping
80
+ # os_name => lambda_which_returns_a_number_of_cores
81
+ #
82
+ @@number_of_cores = {
83
+ 'darwin' => lambda { `system_profiler SPHardwareDataType | grep 'Total Number Of Cores'`.gsub(/[^\d]/, '') },
84
+ 'linux' => lambda { `grep -ci ^processor /proc/cpuinfo` }
85
+ }
86
+ def self.os_to_core_mapping
87
+ @@number_of_cores
88
+ end
89
+ # Extracts the number of cores for the given os name.
90
+ #
91
+ # Note: Default is 1.
92
+ #
93
+ def self.extract_cores_for os
94
+ code_to_execute = os_to_core_mapping[os]
95
+ code_to_execute && code_to_execute.call.to_i || 1
96
+ end
97
+ def self.platform
98
+ RUBY_PLATFORM
99
+ end
100
+
101
+ end
@@ -0,0 +1,23 @@
1
+ class DB < ActiveRecord::Base
2
+
3
+ self.abstract_class = true
4
+
5
+ #
6
+ #
7
+ def self.configure options = {}
8
+ @connection_options = if filename = options[:file]
9
+ File.open(File.join(SEARCH_ROOT, filename)) { |f| YAML::load(f) }
10
+ else
11
+ options
12
+ end
13
+ self
14
+ end
15
+
16
+ #
17
+ #
18
+ def self.connect
19
+ return if SEARCH_ENVIRONMENT.to_s == 'test'
20
+ establish_connection @connection_options
21
+ end
22
+
23
+ end
@@ -0,0 +1,7 @@
1
+ puts 'Compiling with Ruby 1.9'
2
+ require 'mkmf'
3
+
4
+ abort 'need ruby.h' unless have_header("ruby.h")
5
+
6
+ dir_config('performant')
7
+ create_makefile('performant')
@@ -0,0 +1,339 @@
1
+ // Note: This is the Ruby 1.9 version.
2
+ //
3
+ #include "ruby.h"
4
+
5
+ // Copying internal ruby methods.
6
+ //
7
+ static inline VALUE rb_ary_elt(ary, offset)
8
+ VALUE ary;
9
+ long offset;
10
+ {
11
+ if (RARRAY_LEN(ary) == 0) return Qnil;
12
+ if (offset < 0 || RARRAY_LEN(ary) <= offset) {
13
+ return Qnil;
14
+ }
15
+ return RARRAY_PTR(ary)[offset];
16
+ }
17
+ VALUE rb_ary_make_hash(VALUE, VALUE);
18
+ static VALUE ary_make_hash(ary1, ary2)
19
+ VALUE ary1, ary2;
20
+ {
21
+ VALUE hash = rb_hash_new();
22
+ long i;
23
+
24
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
25
+ rb_hash_aset(hash, RARRAY_PTR(ary1)[i], Qtrue);
26
+ }
27
+ if (ary2) {
28
+ for (i=0; i<RARRAY_LEN(ary2); i++) {
29
+ rb_hash_aset(hash, RARRAY_PTR(ary2)[i], Qtrue);
30
+ }
31
+ }
32
+ return hash;
33
+ }
34
+
35
+ // Comparison functions.
36
+ //
37
+ inline int intvaluecmp(VALUE a, VALUE b) {
38
+ return FIX2INT(a) - FIX2INT(b);
39
+ }
40
+ inline int intcmp(const int * a, const int * b) {
41
+ return (*a - *b);
42
+ }
43
+ inline long longcmp(const void * a, const void * b) {
44
+ return (*(long*) a - *(long*) b);
45
+ }
46
+
47
+ // This version just calls the & consecutively for all arrays.
48
+ //
49
+ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
50
+ // counters
51
+ long i, j;
52
+
53
+ // structs
54
+ struct RArray *rb_array_of_arrays;
55
+ struct RArray *smallest_array;
56
+ struct RArray *current_array;
57
+ VALUE hash;
58
+
59
+ // temps
60
+ VALUE v, vv;
61
+
62
+ // conversions
63
+ rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
64
+ smallest_array = RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
65
+
66
+ // iterate through all arrays
67
+ for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
68
+ // Break if the smallest array is empty
69
+ if (RARRAY_LEN(smallest_array) == 0) {
70
+ break;
71
+ }
72
+
73
+ // make a hash from the currently smallest version
74
+ hash = ary_make_hash(smallest_array, 0);
75
+ // clear for use as temp array
76
+ rb_ary_clear(smallest_array);
77
+
78
+ current_array = RARRAY_PTR(rb_array_of_arrays)[i];
79
+ // iterate through all array elements
80
+ for (j = 0; j < RARRAY_LEN(current_array); j++) {
81
+ v = vv = rb_ary_elt(current_array, j);
82
+ if (st_delete(RHASH_TBL(hash), (unsigned long*)&vv, 0)) {
83
+ rb_ary_push(smallest_array, v);
84
+ }
85
+ }
86
+ }
87
+
88
+ return smallest_array;
89
+ }
90
+
91
+ // Brute force algorithm to find the intersection of an array of length sorted, unsorted arrays.
92
+ // This algorithm can be faster than others for small arrays.
93
+ //
94
+ // inline VALUE brute_force_intersect(VALUE self, VALUE length_sorted_array_of_arrays) {
95
+ // // counters
96
+ // long i, j, k;
97
+ //
98
+ // // structs
99
+ // struct RArray *rb_array_of_arrays;
100
+ // struct RArray *candidate_answer_set;
101
+ // struct RArray *current_set;
102
+ //
103
+ // // conversions
104
+ // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
105
+ //
106
+ // // temps
107
+ // VALUE e;
108
+ // unsigned char found;
109
+ //
110
+ // // Let the smallest set s[0] be the candidate answer set
111
+ // // Note: Need a duplicate
112
+ // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
113
+ //
114
+ // // For each entry in candidate anser set
115
+ // // Get current value
116
+ // for(i = 0; i < candidate_answer_set->len; i++) {
117
+ // e = candidate_answer_set->ptr[i];
118
+ //
119
+ // // Find the current value in other arrays
120
+ // // if not found, break
121
+ // for(j = 1; j < rb_array_of_arrays->len; j++) {
122
+ // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
123
+ // found = 0;
124
+ //
125
+ // // Find with a linear search
126
+ // for(k = 0; k < current_set->len; k++) {
127
+ // if (e == current_set->ptr[k]) {
128
+ // found = 1;
129
+ // break;
130
+ // }
131
+ // }
132
+ //
133
+ // // break if not found
134
+ // if (!found) {
135
+ // break;
136
+ // }
137
+ // }
138
+ //
139
+ // // remove from candidate answer set if not found
140
+ // if (!found) {
141
+ // candidate_answer_set->ptr[i] = Qnil;
142
+ // }
143
+ // }
144
+ //
145
+ // // compact the candidate answer set
146
+ // // rb_ary_compact_bang(candidate_answer_set);
147
+ // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
148
+ //
149
+ // return candidate_answer_set;
150
+ // }
151
+
152
+ // inline VALUE intersect_unique(VALUE self, VALUE length_sorted_array_of_arrays) {
153
+ // // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
154
+ //
155
+ // // structs
156
+ // struct RArray *result;
157
+ // struct RArray *rb_array_of_arrays;
158
+ //
159
+ // // conversions
160
+ // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
161
+ //
162
+ // // TODO
163
+ //
164
+ // return result;
165
+ // }
166
+
167
+ // Generates the intersection of multiple
168
+ //
169
+ // inline VALUE sorting_intersect_multiple(VALUE self, VALUE length_sorted_array_of_arrays) {
170
+ // // TODO
171
+ // }
172
+
173
+ // Generates the intersection of multiple length sorted, sorted arrays
174
+ //
175
+ // inline VALUE intersect_multiple_sorted(VALUE self, VALUE _length_sorted_array_of_arrays) {
176
+ // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
177
+ //
178
+ // // counters
179
+ // long i, j;
180
+ // long current_set_position, current_answer_set_position;
181
+ //
182
+ // // structs
183
+ // struct RArray *rb_array_of_arrays;
184
+ // struct RArray *candidate_answer_set;
185
+ // struct RArray *current_set;
186
+ //
187
+ // // temps
188
+ // long e;
189
+ //
190
+ // // conversions
191
+ // rb_array_of_arrays = RARRAY(length_sorted_array_of_arrays);
192
+ //
193
+ // // Let the smallest set s[0] be the candidate answer set
194
+ // // Note: Need a duplicate
195
+ // candidate_answer_set = RARRAY(rb_ary_dup(rb_array_of_arrays->ptr[0]));
196
+ //
197
+ // // For each set s[i], i = 1 .. k do
198
+ // for(i = 1; i < rb_array_of_arrays->len; i++) {
199
+ // current_set = RARRAY(rb_array_of_arrays->ptr[i]);
200
+ // current_set_position = 0;
201
+ //
202
+ // // for each element e in the candidate answer set
203
+ // for(j = 0; j < candidate_answer_set->len; j++) {
204
+ // e = candidate_answer_set->ptr[j];
205
+ //
206
+ // // search for e in the range l[i] to size(s[i])
207
+ // // and update l[i] to the last position probed in the previous step
208
+ // // if e was not found then
209
+ // if (bsearch(
210
+ // &e,
211
+ // &current_set->ptr[current_set_position],
212
+ // (current_set->len - current_set_position),
213
+ // sizeof(VALUE), //sizeof(current_set->ptr[0]),
214
+ // intcmp //longcmp
215
+ // ) == NULL) {
216
+ //
217
+ // // remove e from the candidate answer set
218
+ // // and advance e to the next element in the answer set
219
+ // // rb_ary_delete_at(candidate_answer_set, j);
220
+ // candidate_answer_set->ptr[j] = Qnil;
221
+ // }
222
+ // current_set_position = j - 1;
223
+ // }
224
+ //
225
+ // // compact the candidate answer set
226
+ // // rb_ary_compact_bang(candidate_answer_set);
227
+ // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
228
+ // }
229
+ //
230
+ // return candidate_answer_set;
231
+ // }
232
+
233
+ // Trying to make a custom version of Matz' ary &
234
+ //
235
+ // Differences:
236
+ // * Multiple arrays
237
+ // * No to_ary
238
+ // * Smallest array is used to make hash
239
+ // Note: Assumes that whatever is given in as array of arrays is sorted by array sizes.
240
+ //
241
+ // static VALUE rb_ary_and(ary1, ary2) VALUE ary1, ary2; {
242
+ // static VALUE intersect_multiple_with_hash(VALUE self, VALUE _length_sorted_array_of_arrays) {
243
+ // // VALUE hash, ary3, v, vv;
244
+ // // long i;
245
+ // //
246
+ // // ary2 = to_ary(ary2);
247
+ // // ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ?
248
+ // // RARRAY(ary1)->len : RARRAY(ary2)->len);
249
+ // // hash = ary_make_hash(ary2, 0);
250
+ // //
251
+ // // for (i=0; i<RARRAY(ary1)->len; i++) {
252
+ // // v = vv = rb_ary_elt(ary1, i);
253
+ // // if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) {
254
+ // // rb_ary_push(ary3, v);
255
+ // // }
256
+ // // }
257
+ // //
258
+ // // return ary3;
259
+ // VALUE length_sorted_array_of_arrays = (_length_sorted_array_of_arrays);
260
+ //
261
+ // // structs
262
+ // struct RArray *candidate_answer_set;
263
+ // struct RArray *current_set;
264
+ //
265
+ // // temps
266
+ // VALUE hash, v, vv;
267
+ // long i, j, k;
268
+ //
269
+ // // Get smallest array size
270
+ // candidate_answer_set = rb_ary_new2((RARRAY(rb_array_of_arrays->ptr[0])->len);
271
+ //
272
+ // hash = ary_make_hash(RARRAY(rb_array_of_arrays->ptr[0]), 0);
273
+ //
274
+ // // For each entry in candidate answer set
275
+ // // Get current value
276
+ // for(i = 0; i < candidate_answer_set->len; i++) {
277
+ // // e = candidate_answer_set->ptr[i];
278
+ // v = vv = rb_ary_elt(candidate_answer_set, i);
279
+ //
280
+ // // Find the current value in other arrays
281
+ // // if not found, break
282
+ // for(j = 1; j < rb_array_of_arrays->len; j++) {
283
+ // current_set = RARRAY(rb_array_of_arrays->ptr[j]);
284
+ // found = 0;
285
+ //
286
+ // // Find with a linear search
287
+ // for(k = 0; k < current_set->len; k++) {
288
+ // // if (e == current_set->ptr[k]) {
289
+ // if (st_delete(RHASH(hash)->tbl, (unsigned long*)&vv, 0))
290
+ // found = 1;
291
+ // break;
292
+ // }
293
+ // }
294
+ //
295
+ // // break if not found
296
+ // if (!found) {
297
+ // break;
298
+ // }
299
+ // }
300
+ //
301
+ // // remove from candidate answer set if not found
302
+ // if (!found) {
303
+ // rb_ary_push(result, v);
304
+ // // candidate_answer_set->ptr[i] = Qnil;
305
+ // }
306
+ // }
307
+ //
308
+ // // compact the candidate answer set
309
+ // // rb_ary_compact_bang(candidate_answer_set);
310
+ // rb_funcall(candidate_answer_set, rb_intern("compact!"), 0);
311
+ //
312
+ // return candidate_answer_set;
313
+ // }
314
+
315
+ // VALUE rb_ary_clear_bang(ary) VALUE ary; {
316
+ // rb_ary_modify(ary);
317
+ // ARY_SET_LEN(ary, 0);
318
+ // // capa stays the same
319
+ // // if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) {
320
+ // // REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2);
321
+ // // RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2;
322
+ // // }
323
+ // return ary;
324
+ // }
325
+
326
+ VALUE p_mPerformant, p_cArray;
327
+
328
+ void Init_performant() {
329
+ p_mPerformant = rb_define_module("Performant");
330
+ p_cArray = rb_define_class_under(p_mPerformant, "Array", rb_cObject);
331
+ // p_cArray = rb_define_module_under(p_mPerformant, "Array");
332
+
333
+ // rb_define_method(rb_cArray, "clear!", rb_ary_clear_bang, 0);
334
+
335
+ rb_define_singleton_method(p_cArray, "memory_efficient_intersect", memory_efficient_intersect, 1);
336
+ // rb_define_singleton_method(p_cArray, "brute_force_intersect", brute_force_intersect, 1);
337
+ // rb_define_singleton_method(p_cArray, "intersect_multiple_sorted", intersect_multiple_sorted, 1);
338
+ // rb_define_singleton_method(p_cArray, "intersect_multiple_with_hash", intersect_multiple_sorted_with_hash, 1);
339
+ }