coupler 0.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. data/.document +5 -0
  2. data/.gitmodules +3 -0
  3. data/.rvmrc +1 -0
  4. data/.vimrc +40 -0
  5. data/Gemfile +27 -0
  6. data/Gemfile.lock +71 -0
  7. data/LICENSE +20 -0
  8. data/NOTES +6 -0
  9. data/README.rdoc +18 -0
  10. data/Rakefile +42 -0
  11. data/TODO +11 -0
  12. data/VERSION +1 -0
  13. data/bin/coupler +7 -0
  14. data/db/.gitignore +6 -0
  15. data/db/migrate/001_initial_schema.rb +166 -0
  16. data/db/migrate/002_stub.rb +4 -0
  17. data/db/migrate/003_stub.rb +4 -0
  18. data/db/migrate/004_create_comparisons.rb +28 -0
  19. data/db/migrate/005_move_database_name.rb +19 -0
  20. data/db/migrate/006_upgrade_comparisons.rb +34 -0
  21. data/db/migrate/007_add_which_to_comparisons.rb +23 -0
  22. data/db/migrate/008_add_result_field_to_transformations.rb +33 -0
  23. data/db/migrate/009_add_generated_flag_to_fields.rb +13 -0
  24. data/db/migrate/010_create_imports.rb +24 -0
  25. data/db/migrate/011_add_primary_key_type.rb +13 -0
  26. data/db/migrate/012_add_transformed_with_to_resources.rb +13 -0
  27. data/db/migrate/013_add_run_count_to_scenarios.rb +13 -0
  28. data/db/migrate/014_add_last_accessed_at_to_some_tables.rb +13 -0
  29. data/db/migrate/015_add_run_number_to_results.rb +15 -0
  30. data/db/migrate/016_fix_scenario_run_count.rb +27 -0
  31. data/db/migrate/017_rename_comparison_columns.rb +14 -0
  32. data/db/migrate/018_fix_scenario_linkage_type.rb +8 -0
  33. data/db/migrate/019_add_columns_to_imports.rb +24 -0
  34. data/db/migrate/020_rename_import_columns.rb +12 -0
  35. data/db/migrate/021_add_fields_to_connections.rb +15 -0
  36. data/db/migrate/022_remove_database_name_from_resources.rb +11 -0
  37. data/features/connections.feature +28 -0
  38. data/features/matchers.feature +35 -0
  39. data/features/projects.feature +11 -0
  40. data/features/resources.feature +62 -0
  41. data/features/scenarios.feature +45 -0
  42. data/features/step_definitions/coupler_steps.rb +145 -0
  43. data/features/step_definitions/matchers_steps.rb +26 -0
  44. data/features/step_definitions/resources_steps.rb +12 -0
  45. data/features/step_definitions/scenarios_steps.rb +7 -0
  46. data/features/step_definitions/transformations_steps.rb +3 -0
  47. data/features/support/env.rb +128 -0
  48. data/features/transformations.feature +22 -0
  49. data/features/wizard.feature +10 -0
  50. data/gfx/coupler-header.svg +213 -0
  51. data/gfx/coupler-sidebar.svg +656 -0
  52. data/gfx/coupler.svg +184 -0
  53. data/gfx/icon.svg +75 -0
  54. data/lib/coupler/base.rb +63 -0
  55. data/lib/coupler/config.rb +128 -0
  56. data/lib/coupler/data_uploader.rb +20 -0
  57. data/lib/coupler/database.rb +31 -0
  58. data/lib/coupler/extensions/connections.rb +57 -0
  59. data/lib/coupler/extensions/exceptions.rb +58 -0
  60. data/lib/coupler/extensions/imports.rb +43 -0
  61. data/lib/coupler/extensions/jobs.rb +21 -0
  62. data/lib/coupler/extensions/matchers.rb +64 -0
  63. data/lib/coupler/extensions/projects.rb +62 -0
  64. data/lib/coupler/extensions/resources.rb +89 -0
  65. data/lib/coupler/extensions/results.rb +100 -0
  66. data/lib/coupler/extensions/scenarios.rb +50 -0
  67. data/lib/coupler/extensions/transformations.rb +70 -0
  68. data/lib/coupler/extensions/transformers.rb +58 -0
  69. data/lib/coupler/extensions.rb +16 -0
  70. data/lib/coupler/helpers.rb +121 -0
  71. data/lib/coupler/import_buffer.rb +48 -0
  72. data/lib/coupler/logger.rb +16 -0
  73. data/lib/coupler/models/common_model.rb +104 -0
  74. data/lib/coupler/models/comparison.rb +166 -0
  75. data/lib/coupler/models/connection.rb +59 -0
  76. data/lib/coupler/models/field.rb +55 -0
  77. data/lib/coupler/models/import.rb +238 -0
  78. data/lib/coupler/models/job.rb +42 -0
  79. data/lib/coupler/models/jobify.rb +17 -0
  80. data/lib/coupler/models/matcher.rb +36 -0
  81. data/lib/coupler/models/project.rb +40 -0
  82. data/lib/coupler/models/resource.rb +287 -0
  83. data/lib/coupler/models/result.rb +92 -0
  84. data/lib/coupler/models/scenario/runner.rb +357 -0
  85. data/lib/coupler/models/scenario.rb +115 -0
  86. data/lib/coupler/models/transformation.rb +117 -0
  87. data/lib/coupler/models/transformer/runner.rb +28 -0
  88. data/lib/coupler/models/transformer.rb +110 -0
  89. data/lib/coupler/models.rb +30 -0
  90. data/lib/coupler/runner.rb +76 -0
  91. data/lib/coupler/scheduler.rb +56 -0
  92. data/lib/coupler.rb +34 -0
  93. data/log/.gitignore +1 -0
  94. data/misc/README +5 -0
  95. data/misc/jruby-json.license +57 -0
  96. data/misc/rack-flash.license +22 -0
  97. data/script/dbconsole.rb +5 -0
  98. data/src/edu/vanderbilt/coupler/Main.java +116 -0
  99. data/src/edu/vanderbilt/coupler/jruby.properties +1 -0
  100. data/tasks/annotations.rake +84 -0
  101. data/tasks/db.rake +120 -0
  102. data/tasks/environment.rake +12 -0
  103. data/tasks/jeweler.rake +43 -0
  104. data/tasks/package.rake +58 -0
  105. data/tasks/rdoc.rake +13 -0
  106. data/tasks/test.rake +63 -0
  107. data/tasks/vendor.rake +43 -0
  108. data/test/README.txt +6 -0
  109. data/test/config.yml +9 -0
  110. data/test/coupler/models/test_import.rb +221 -0
  111. data/test/factories.rb +91 -0
  112. data/test/fixtures/duplicate-keys.csv +5 -0
  113. data/test/fixtures/no-headers.csv +50 -0
  114. data/test/fixtures/people.csv +51 -0
  115. data/test/fixtures/varying-row-size.csv +4 -0
  116. data/test/helper.rb +156 -0
  117. data/test/integration/extensions/test_connections.rb +80 -0
  118. data/test/integration/extensions/test_imports.rb +94 -0
  119. data/test/integration/extensions/test_jobs.rb +52 -0
  120. data/test/integration/extensions/test_matchers.rb +134 -0
  121. data/test/integration/extensions/test_projects.rb +82 -0
  122. data/test/integration/extensions/test_resources.rb +150 -0
  123. data/test/integration/extensions/test_results.rb +89 -0
  124. data/test/integration/extensions/test_scenarios.rb +88 -0
  125. data/test/integration/extensions/test_transformations.rb +113 -0
  126. data/test/integration/extensions/test_transformers.rb +80 -0
  127. data/test/integration/test_field.rb +45 -0
  128. data/test/integration/test_import.rb +78 -0
  129. data/test/integration/test_running_scenarios.rb +379 -0
  130. data/test/integration/test_transformation.rb +56 -0
  131. data/test/integration/test_transforming.rb +154 -0
  132. data/test/table_sets.rb +76 -0
  133. data/test/unit/models/test_common_model.rb +130 -0
  134. data/test/unit/models/test_comparison.rb +619 -0
  135. data/test/unit/models/test_connection.rb +115 -0
  136. data/test/unit/models/test_field.rb +99 -0
  137. data/test/unit/models/test_import.rb +130 -0
  138. data/test/unit/models/test_job.rb +115 -0
  139. data/test/unit/models/test_matcher.rb +82 -0
  140. data/test/unit/models/test_project.rb +102 -0
  141. data/test/unit/models/test_resource.rb +564 -0
  142. data/test/unit/models/test_result.rb +90 -0
  143. data/test/unit/models/test_scenario.rb +199 -0
  144. data/test/unit/models/test_transformation.rb +193 -0
  145. data/test/unit/models/test_transformer.rb +188 -0
  146. data/test/unit/test_base.rb +60 -0
  147. data/test/unit/test_data_uploader.rb +27 -0
  148. data/test/unit/test_database.rb +23 -0
  149. data/test/unit/test_helpers.rb +58 -0
  150. data/test/unit/test_logger.rb +10 -0
  151. data/test/unit/test_models.rb +12 -0
  152. data/test/unit/test_runner.rb +76 -0
  153. data/test/unit/test_scheduler.rb +66 -0
  154. data/uploads/.gitignore +2 -0
  155. data/vendor/java/.gitignore +5 -0
  156. data/webroot/public/css/960.css +1 -0
  157. data/webroot/public/css/dataTables.css +1057 -0
  158. data/webroot/public/css/jquery-ui.css +572 -0
  159. data/webroot/public/css/jquery.treeview.css +68 -0
  160. data/webroot/public/css/reset.css +1 -0
  161. data/webroot/public/css/style.css +504 -0
  162. data/webroot/public/css/text.css +1 -0
  163. data/webroot/public/favicon.ico +0 -0
  164. data/webroot/public/images/12_col.gif +0 -0
  165. data/webroot/public/images/16_col.gif +0 -0
  166. data/webroot/public/images/add.png +0 -0
  167. data/webroot/public/images/ajax-loader.gif +0 -0
  168. data/webroot/public/images/cog.png +0 -0
  169. data/webroot/public/images/coupler.png +0 -0
  170. data/webroot/public/images/foo.png +0 -0
  171. data/webroot/public/images/hammer.png +0 -0
  172. data/webroot/public/images/header.png +0 -0
  173. data/webroot/public/images/home.gif +0 -0
  174. data/webroot/public/images/jobs.gif +0 -0
  175. data/webroot/public/images/sidebar-bottom.png +0 -0
  176. data/webroot/public/images/sidebar.png +0 -0
  177. data/webroot/public/images/treeview-default-line.gif +0 -0
  178. data/webroot/public/images/treeview-default.gif +0 -0
  179. data/webroot/public/images/ui-anim_basic_16x16.gif +0 -0
  180. data/webroot/public/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  181. data/webroot/public/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  182. data/webroot/public/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  183. data/webroot/public/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  184. data/webroot/public/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  185. data/webroot/public/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  186. data/webroot/public/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  187. data/webroot/public/images/ui-bg_highlight-hard_30_565356_1x100.png +0 -0
  188. data/webroot/public/images/ui-bg_highlight-hard_75_888588_1x100.png +0 -0
  189. data/webroot/public/images/ui-bg_highlight-soft_30_6e3b3a_1x100.png +0 -0
  190. data/webroot/public/images/ui-bg_highlight-soft_35_8e8b8e_1x100.png +0 -0
  191. data/webroot/public/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  192. data/webroot/public/images/ui-icons_222222_256x240.png +0 -0
  193. data/webroot/public/images/ui-icons_2e83ff_256x240.png +0 -0
  194. data/webroot/public/images/ui-icons_454545_256x240.png +0 -0
  195. data/webroot/public/images/ui-icons_888888_256x240.png +0 -0
  196. data/webroot/public/images/ui-icons_cd0a0a_256x240.png +0 -0
  197. data/webroot/public/images/ui-icons_ffffff_256x240.png +0 -0
  198. data/webroot/public/js/ajaxupload.js +673 -0
  199. data/webroot/public/js/application.js +40 -0
  200. data/webroot/public/js/jquery-ui.combobox.js +98 -0
  201. data/webroot/public/js/jquery-ui.js +9867 -0
  202. data/webroot/public/js/jquery-ui.min.js +559 -0
  203. data/webroot/public/js/jquery.dataTables.min.js +587 -0
  204. data/webroot/public/js/jquery.min.js +154 -0
  205. data/webroot/public/js/jquery.timeago.js +140 -0
  206. data/webroot/public/js/jquery.tooltip.min.js +19 -0
  207. data/webroot/public/js/jquery.treeview.min.js +15 -0
  208. data/webroot/public/js/resource.js +11 -0
  209. data/webroot/public/js/results.js +56 -0
  210. data/webroot/public/js/transformations.js +95 -0
  211. data/webroot/views/connections/index.erb +5 -0
  212. data/webroot/views/connections/list.erb +34 -0
  213. data/webroot/views/connections/new.erb +55 -0
  214. data/webroot/views/connections/show.erb +36 -0
  215. data/webroot/views/imports/edit.erb +60 -0
  216. data/webroot/views/imports/form.erb +81 -0
  217. data/webroot/views/imports/new.erb +89 -0
  218. data/webroot/views/index.erb +12 -0
  219. data/webroot/views/jobs/index.erb +7 -0
  220. data/webroot/views/jobs/list.erb +24 -0
  221. data/webroot/views/layout.erb +38 -0
  222. data/webroot/views/matchers/form.erb +250 -0
  223. data/webroot/views/matchers/list.erb +32 -0
  224. data/webroot/views/projects/form.erb +14 -0
  225. data/webroot/views/projects/index.erb +96 -0
  226. data/webroot/views/projects/show.erb +24 -0
  227. data/webroot/views/resources/edit.erb +88 -0
  228. data/webroot/views/resources/index.erb +5 -0
  229. data/webroot/views/resources/list.erb +27 -0
  230. data/webroot/views/resources/new.erb +121 -0
  231. data/webroot/views/resources/show.erb +86 -0
  232. data/webroot/views/resources/transform.erb +2 -0
  233. data/webroot/views/results/csv.erb +12 -0
  234. data/webroot/views/results/details.erb +15 -0
  235. data/webroot/views/results/index.erb +2 -0
  236. data/webroot/views/results/list.erb +22 -0
  237. data/webroot/views/results/record.erb +24 -0
  238. data/webroot/views/results/show.erb +68 -0
  239. data/webroot/views/scenarios/index.erb +5 -0
  240. data/webroot/views/scenarios/list.erb +20 -0
  241. data/webroot/views/scenarios/new.erb +99 -0
  242. data/webroot/views/scenarios/run.erb +2 -0
  243. data/webroot/views/scenarios/show.erb +50 -0
  244. data/webroot/views/sidebar.erb +106 -0
  245. data/webroot/views/transformations/create.erb +115 -0
  246. data/webroot/views/transformations/for.erb +16 -0
  247. data/webroot/views/transformations/index.erb +2 -0
  248. data/webroot/views/transformations/list.erb +29 -0
  249. data/webroot/views/transformations/new.erb +126 -0
  250. data/webroot/views/transformations/preview.erb +46 -0
  251. data/webroot/views/transformers/edit.erb +6 -0
  252. data/webroot/views/transformers/form.erb +58 -0
  253. data/webroot/views/transformers/index.erb +2 -0
  254. data/webroot/views/transformers/list.erb +25 -0
  255. data/webroot/views/transformers/new.erb +5 -0
  256. data/webroot/views/transformers/preview.erb +23 -0
  257. data/webroot/views/transformers/show.erb +0 -0
  258. metadata +558 -0
@@ -0,0 +1,357 @@
1
+ module Coupler
2
+ module Models
3
+ class Scenario
4
+ class Runner
5
+ LIMIT = 10000
6
+
7
+ def initialize(parent, &progress)
8
+ @parent = parent
9
+ @progress = progress
10
+ @matcher = parent.matcher
11
+ @type = parent.linkage_type
12
+ if @type == 'cross-linkage'
13
+ @resources = [parent.resource_1, parent.resource_1]
14
+ else
15
+ @resources = parent.resources
16
+ end
17
+ @run_number = @parent.run_count + 1
18
+ @mutex = Mutex.new
19
+ @group_number = 0
20
+ setup_pairs
21
+ create_tables
22
+ end
23
+
24
+ def run!
25
+ @parent.local_database do |scenario_db|
26
+ @groups_dataset = scenario_db[@groups_table_name]
27
+ @groups_buffer = ImportBuffer.new(@groups_column_names, @groups_dataset)
28
+ @join_dataset = scenario_db[@join_table_name]
29
+ @join_buffer = ImportBuffer.new([:record_id, :which, :group_id], @join_dataset)
30
+
31
+ # Group records for each dataset. This step is the same for both
32
+ # self-linkage and dual-linkage. However, this is the only step
33
+ # for self-linkage. Dual-linkage requires another pass after
34
+ # this.
35
+ #
36
+ # Cross-matching on a single dataset is treated as a dual-linkage.
37
+ #
38
+ @pairs = @phase_one_pairs
39
+ tw = ThreadsWait.new
40
+ databases_to_close = []
41
+ @resources.each_with_index do |resource, i|
42
+ dataset = resource.final_dataset
43
+ databases_to_close << dataset.db
44
+ primary_key = resource.primary_key_sym
45
+ which = @type == 'self-linkage' ? nil : i
46
+ resource_thread = phase_one_thread(dataset, primary_key, which)
47
+ tw.join_nowait(resource_thread)
48
+ end
49
+ tw.all_waits
50
+ databases_to_close.each do |db| # tidy up
51
+ db.disconnect
52
+ ::Sequel::DATABASES.delete(db)
53
+ end
54
+
55
+ if @type != 'self-linkage'
56
+ # Phase 2!
57
+ secondary_groups_ds = scenario_db[@secondary_groups_table_name].order(:group_1_id, :group_2_id)
58
+ @join_buffer = ImportBuffer.new([:group_1_id, :group_2_id], secondary_groups_ds)
59
+ @pairs = @phase_two_pairs
60
+ phase_two(@groups_dataset, :id)
61
+ @join_buffer.flush
62
+
63
+ # Update groups and groups_records
64
+ tw = ThreadsWait.new
65
+ count = secondary_groups_ds.count
66
+ offset = 0
67
+ last_group_id = @group_number
68
+ while offset < count
69
+ dataset = secondary_groups_ds.limit(10, offset)
70
+ offset += 10
71
+ dataset.each do |row|
72
+ thread = Thread.new(row[:group_1_id], row[:group_2_id]) do |group_1_id, group_2_id|
73
+ new_group_id = get_next_group_id
74
+ @join_dataset.filter(:group_id => [group_1_id, group_2_id]).update(:group_id => new_group_id)
75
+ @groups_dataset.filter(:id => group_1_id).update(:id => new_group_id)
76
+ @groups_dataset.filter(:id => group_2_id).delete
77
+ end
78
+ thread.abort_on_exception = true
79
+ tw.join_nowait(thread)
80
+ end
81
+ tw.all_waits
82
+ end
83
+
84
+ # Clean up groups and records that don't match
85
+ @groups_dataset.filter(:id <= last_group_id).delete
86
+ @join_dataset.filter(:group_id <= last_group_id).delete
87
+
88
+ # Don't need the secondary table anymore
89
+ scenario_db.drop_table(@secondary_groups_table_name)
90
+ end
91
+
92
+ # Calculate some summary stats
93
+ @join_dataset.group_and_count(:group_id, :which).each do |row|
94
+ col = row[:which] ? :"resource_#{row[:which]+1}_count" : :"resource_1_count"
95
+ @groups_dataset.filter(:id => row[:group_id]).update(col => row[:count])
96
+ end
97
+ end
98
+ end
99
+
100
+ private
101
+ def setup_pairs
102
+ @field_pairs = []
103
+ @phase_one_pairs = []
104
+ @phase_two_pairs = @type == 'self-linkage' ? nil : []
105
+ @matcher.comparisons.each do |comparison|
106
+ if !comparison.blocking?
107
+ fields = comparison.fields
108
+ @field_pairs << fields
109
+ @phase_one_pairs.push(fields.collect(&:name_sym))
110
+ if @phase_two_pairs
111
+ pair_sym = :"pair_#{@phase_two_pairs.length}"
112
+ @phase_two_pairs.push([pair_sym, pair_sym])
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ def create_tables
119
+ # Yes, this could be done during setup_pairs, but I think
120
+ # this is more clean. Also, there will only be very few pairs.
121
+ groups_columns = [
122
+ {:name => :id, :type => Integer, :primary_key => true}
123
+ ]
124
+ @group_value_fields = []
125
+ @field_pairs.each_with_index do |(field_1, field_2), i|
126
+ type_1 = field_1.local_column_options[:type]
127
+ type_2 = field_2.local_column_options[:type]
128
+ if type_1 != type_2
129
+ # FIXME!
130
+ raise "BOOM!!"
131
+ end
132
+ @group_value_fields.push([field_1.name_sym, field_2.name_sym])
133
+ groups_columns.push({:name => :"pair_#{i}", :type => type_1})
134
+ end
135
+ @groups_column_names = groups_columns.collect { |c| c[:name] }
136
+ @groups_table_name = :"groups_#{@run_number}"
137
+
138
+ # Add extra columns to the groups table for summary stats
139
+ @resources.length.times do |i|
140
+ groups_columns << {:name => :"resource_#{i+1}_count", :type => Integer}
141
+ end
142
+
143
+ key_types = @resources.collect { |r| r.primary_key_type }.uniq
144
+ record_id_type = key_types.length == 1 ? key_types[0] : String
145
+ @join_table_name = :"groups_records_#{@run_number}"
146
+
147
+ @parent.local_database do |scenario_db|
148
+ scenario_db.create_table!(@groups_table_name) do
149
+ columns.push(*groups_columns)
150
+ end
151
+ scenario_db.create_table!(@join_table_name) do
152
+ column :record_id, record_id_type
153
+ Integer :which
154
+ Integer :group_id, :index => true
155
+ index [:group_id, :which, :record_id] # speedy filtering when showing results
156
+ end
157
+ if @type != 'self-linkage'
158
+ # Need another groups table
159
+ @secondary_groups_table_name = :"groups_groups_#{@run_number}"
160
+ scenario_db.create_table!(@secondary_groups_table_name) do
161
+ Integer :group_1_id
162
+ Integer :group_2_id
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ def phase_one_thread(dataset, primary_key, which)
169
+ thread = Thread.new do
170
+ # Apply filters and what not from comparisons
171
+ dataset = dataset.select(primary_key)
172
+ @matcher.comparisons.each do |comparison|
173
+ dataset = comparison.apply(dataset, which)
174
+ end
175
+ dataset = dataset.order_more(primary_key)
176
+
177
+ # Do the work
178
+ local_tw = ThreadsWait.new
179
+ threads = []
180
+ count = dataset.count
181
+ segments = count / LIMIT
182
+ segments += 1 if count % LIMIT > 0
183
+
184
+ segments.times do |segment_num|
185
+ ds = dataset.limit(LIMIT, LIMIT * segment_num)
186
+ thread = Thread.new do
187
+ prev_row = nil
188
+ group_id = nil
189
+
190
+ ds.each_with_index do |row, row_num|
191
+ if row_num > 0
192
+ result = compare_rows(prev_row, row, which)
193
+ if (result || which) && group_id.nil?
194
+ # If `which` is not nil, that means we're in the first
195
+ # stage of a dual or cross linkage. So, we should save
196
+ # groups that only have 1 record in them.
197
+ group_id = create_group(prev_row, which)
198
+ @join_buffer.add([prev_row[primary_key], which, group_id])
199
+ end
200
+ if result
201
+ @join_buffer.add([row[primary_key], which, group_id])
202
+ else
203
+ group_id = nil
204
+ end
205
+
206
+ # This stores the first record of this segment in order
207
+ # to check it against the last record of the previous
208
+ # segment. I don't like putting this inside the loop
209
+ # really, but it's better than making another database
210
+ # query.
211
+ if segment_num > 0 && row_num == 1
212
+ Thread.current[:head] = { :row => prev_row, :group_id => group_id }
213
+ end
214
+ end
215
+ prev_row = row
216
+ end
217
+ if which && group_id.nil?
218
+ # See above comment about `which`
219
+ group_id = create_group(prev_row, which)
220
+ @join_buffer.add([prev_row[primary_key], which, group_id])
221
+ end
222
+
223
+ # This stores the last record of this segment in order to
224
+ # check it against the first record of the next segment.
225
+ if segment_num < (segments - 1)
226
+ Thread.current[:tail] = { :row => prev_row, :group_id => group_id }
227
+ end
228
+ end
229
+ thread.abort_on_exception = true
230
+ threads << thread
231
+ local_tw.join_nowait(thread)
232
+ local_tw.next_wait if local_tw.threads.length == 10
233
+ end
234
+ local_tw.all_waits
235
+ @groups_buffer.flush
236
+ @join_buffer.flush
237
+
238
+ # Compare heads and tails from segments
239
+ if segments > 1
240
+ head = tail = nil
241
+ threads.each do |thread|
242
+ if tail
243
+ head = thread[:head]
244
+ if compare_rows(tail, head, which)
245
+ @join_dataset.filter({
246
+ :group_id => head[:group_id]
247
+ }).update(:group_id => tail[:group_id])
248
+ @groups_dataset.filter({
249
+ :id => head[:group_id]
250
+ }).delete
251
+ end
252
+ end
253
+ tail = thread[:tail]
254
+ end
255
+ end
256
+ end
257
+ thread.abort_on_exception = true
258
+ thread
259
+ end
260
+
261
+ def phase_two(dataset, primary_key)
262
+ # Add sorting only
263
+ @pairs.each do |fields|
264
+ dataset = dataset.order_more(*fields.uniq)
265
+ end
266
+ dataset = dataset.order_more(primary_key)
267
+
268
+ local_tw = ThreadsWait.new
269
+ threads = []
270
+ count = dataset.count
271
+ segments = count / LIMIT
272
+ segments += 1 if count % LIMIT > 0
273
+
274
+ segments.times do |segment_num|
275
+ ds = dataset.limit(LIMIT, LIMIT * segment_num)
276
+ thread = Thread.new do
277
+ prev_result = nil
278
+ prev_row = nil
279
+ skip = false
280
+
281
+ ds.each_with_index do |row, row_num|
282
+ if row_num > 0 && !skip
283
+ result = compare_rows(prev_row, row)
284
+ # There should only be one-to-one matches here.
285
+ if result
286
+ @join_buffer.add([prev_row[primary_key], row[primary_key]])
287
+ skip = true # Skip comparing this row to the next
288
+ end
289
+
290
+ if segment_num > 0 && row_num == 1 && !result
291
+ Thread.current[:head] = { :row => prev_row }
292
+ end
293
+ elsif skip
294
+ skip = false
295
+ end
296
+ prev_row = row
297
+ prev_result = result
298
+ end
299
+ if segment_num < (segments - 1) && !prev_result
300
+ Thread.current[:tail] = { :row => prev_row }
301
+ end
302
+ end
303
+ thread.abort_on_exception = true
304
+ threads << thread
305
+ local_tw.join_nowait(thread)
306
+ local_tw.next_wait if local_tw.threads.length == 10
307
+ end
308
+ local_tw.all_waits
309
+ @join_buffer.flush
310
+
311
+ # Compare heads and tails from segments
312
+ if threads.length > 1
313
+ head = tail = nil
314
+ threads.each do |thread|
315
+ if tail
316
+ head = thread[:head]
317
+ if head && compare_rows(tail[:row], head[:row])
318
+ @join_buffer.add([tail[:row][primary_key], head[:row][primary_key]])
319
+ end
320
+ end
321
+ tail = thread[:tail]
322
+ end
323
+ end
324
+ end
325
+
326
+ def compare_rows(row_1, row_2, which = nil)
327
+ values = []
328
+ @pairs.each do |fields|
329
+ value_1 = row_1[fields[which || 0]]
330
+ value_2 = row_2[fields[which || 1]]
331
+ if value_1 == value_2
332
+ values << value_1
333
+ else
334
+ values = nil
335
+ break
336
+ end
337
+ end
338
+ values
339
+ end
340
+
341
+ def create_group(row, which)
342
+ group_id = get_next_group_id
343
+ group_row = [group_id]
344
+ @group_value_fields.each do |fields|
345
+ group_row.push(row[fields[which || 0]])
346
+ end
347
+ @groups_buffer.add(group_row)
348
+ group_id
349
+ end
350
+
351
+ def get_next_group_id
352
+ @mutex.synchronize { @group_number += 1 }
353
+ end
354
+ end
355
+ end
356
+ end
357
+ end
@@ -0,0 +1,115 @@
1
+ module Coupler
2
+ module Models
3
+ class Scenario < Sequel::Model
4
+ class NoMatcherError < Exception; end
5
+ class ResourcesOutOfDateError < Exception; end
6
+
7
+ include CommonModel
8
+ include Jobify
9
+
10
+ attr_writer :resource_ids
11
+ many_to_one :project
12
+ many_to_one :resource_1, :class => "Coupler::Models::Resource"
13
+ many_to_one :resource_2, :class => "Coupler::Models::Resource"
14
+ one_to_one :matcher
15
+ one_to_many :results
16
+
17
+ def self.count_by_project
18
+ dataset.naked.group_and_count(:project_id).to_hash(:project_id, :count)
19
+ end
20
+
21
+ def status
22
+ if matcher.nil?
23
+ "no_matcher"
24
+ elsif resources.any? { |r| r.status == "out_of_date" }
25
+ "resources_out_of_date"
26
+ else
27
+ "ok"
28
+ end
29
+ end
30
+
31
+ def resources
32
+ if resource_1
33
+ resource_2 ? [resource_1, resource_2] : [resource_1]
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ def local_database(&block)
40
+ Sequel.connect(local_connection_string, {
41
+ :loggers => [Coupler::Logger.instance],
42
+ :max_connections => 50,
43
+ :pool_timeout => 60
44
+ }, &block)
45
+ end
46
+
47
+ def run!(&progress)
48
+ case status
49
+ when 'no_matcher' then raise NoMatcherError
50
+ when 'resources_out_of_date' then raise ResourcesOutOfDateError
51
+ end
52
+
53
+ runner = Runner.new(self, &progress)
54
+ runner.run!
55
+
56
+ update(:run_count => run_count + 1, :last_run_at => Time.now)
57
+ result = Result.new(:scenario => self, :run_number => run_count)
58
+ result.save
59
+ end
60
+
61
+ def set_linkage_type
62
+ if resource_1
63
+ if resource_2
64
+ self.linkage_type = "dual-linkage"
65
+ elsif matcher && matcher.cross_match?
66
+ self.linkage_type = "cross-linkage"
67
+ else
68
+ self.linkage_type = "self-linkage"
69
+ end
70
+ else
71
+ self.linkage_type = "N/A"
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def local_connection_string
78
+ Base.connection_string("scenario_#{id}")
79
+ end
80
+
81
+ def before_validation
82
+ super
83
+ if @resource_ids.is_a?(Array)
84
+ objects = project.resources_dataset.filter(:id => @resource_ids[0..1].compact).all
85
+ self.resource_1_id = objects[0].nil? ? nil : objects[0].id
86
+ self.resource_2_id = objects[1].nil? ? nil : objects[1].id
87
+ end
88
+ end
89
+
90
+ def validate
91
+ super
92
+ validates_presence :name
93
+ validates_unique [:name, :project_id]
94
+ if resource_1_id.nil?
95
+ errors.add(:base, "At least one resource is required")
96
+ end
97
+ end
98
+
99
+ def before_create
100
+ super
101
+ self.slug ||= name.downcase.gsub(/\s+/, "_")
102
+ set_linkage_type
103
+ end
104
+
105
+ # There is currently no plan to let a user change resources
106
+ # after a scenario has been created.
107
+ #def before_update
108
+ #super
109
+ #set_linkage_type
110
+ #end
111
+ end
112
+ end
113
+ end
114
+
115
+ require File.join(File.dirname(__FILE__), 'scenario', 'runner')
@@ -0,0 +1,117 @@
1
+ module Coupler
2
+ module Models
3
+ class Transformation < Sequel::Model
4
+ include CommonModel
5
+ many_to_one :resource
6
+ many_to_one :source_field, :class => Field
7
+ many_to_one :result_field, :class => Field
8
+ many_to_one :transformer
9
+
10
+ plugin :nested_attributes
11
+ nested_attributes :result_field
12
+ nested_attributes :transformer, :destroy => false
13
+
14
+ def transform(data)
15
+ transformer.transform(data, {
16
+ :in => source_field.name.to_sym,
17
+ :out => result_field.name.to_sym
18
+ })
19
+ end
20
+
21
+ def field_changes
22
+ transformer.field_changes(source_field)
23
+ end
24
+
25
+ # NOTE: The fact that the aliased name doesn't have an = at the end is
26
+ # important. Ruby methods with names that have = at the end always
27
+ # return the RHS value, regardless of what the method actually returns.
28
+ # The only way to grab the associated object that gets created from the
29
+ # nested attributes methods is by fetching the return value.
30
+ #
31
+ alias :original_result_field_attributes :result_field_attributes=
32
+ def result_field_attributes=(h)
33
+ @staged_result_field = self.original_result_field_attributes(h.merge({
34
+ :is_generated => true
35
+ }))
36
+ end
37
+
38
+ def deletable?
39
+ position == self.class.max(:position) &&
40
+ (result_field.nil? || !result_field.is_generated || result_field.scenarios_dataset.count == 0)
41
+ end
42
+
43
+ private
44
+ def before_validation
45
+ super
46
+ if source_field_id && !result_field_id && !@staged_result_field
47
+ self.result_field_id = source_field_id
48
+ end
49
+
50
+ if @staged_result_field && transformer && source_field && resource_id
51
+ hash = transformer.field_changes(source_field).values[0]
52
+ if hash.empty?
53
+ hash.update({
54
+ :type => source_field[:type],
55
+ :db_type => source_field[:db_type]
56
+ })
57
+ end
58
+ hash[:resource_id] = resource_id
59
+ @staged_result_field.set(hash)
60
+ end
61
+ end
62
+
63
+ def validate
64
+ super
65
+ validates_presence [:resource_id, :source_field_id]
66
+ if transformer.nil?
67
+ errors.add(:transformer_id, "is not present")
68
+ end
69
+ if errors.empty?
70
+ source_field = resource.fields_dataset[:id => source_field_id]
71
+ if source_field.nil?
72
+ errors.add(:source_field_id, "is invalid")
73
+ else
74
+ if transformer.allowed_types.is_a?(Array) && !transformer.allowed_types.include?(source_field.final_type)
75
+ errors.add(:base, "#{transformer.name} cannot transform type '#{source_field.final_type}'")
76
+ end
77
+ end
78
+
79
+ if !@staged_result_field
80
+ result_field = result_field_id ? resource.fields_dataset[:id => result_field_id] : nil
81
+ if result_field.nil?
82
+ errors.add(:result_field_id, "is invalid")
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ def before_create
89
+ super
90
+ self.position ||= self.class.filter(:resource_id => resource_id).count + 1
91
+ end
92
+
93
+ def after_save
94
+ super
95
+ resource.refresh_fields!
96
+ end
97
+
98
+ def before_destroy
99
+ # Prevent all but the last transformation from being destroyed
100
+ #
101
+ # TODO: I'm putting off programming the complex logic required to
102
+ # enable deletion from the middle of a transformation stack.
103
+ #
104
+ super
105
+ deletable?
106
+ end
107
+
108
+ def after_destroy
109
+ super
110
+ if result_field && result_field.is_generated && self.class.filter(:result_field_id => result_field.id).count == 0
111
+ result_field.destroy
112
+ end
113
+ resource.refresh_fields! if resource
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,28 @@
1
+ module Coupler
2
+ module Models
3
+ class Transformer
4
+ class Runner
5
+ instance_methods.each do |m|
6
+ undef_method m unless m =~ /^__|^instance_eval$/
7
+ end
8
+
9
+ def initialize(code, input)
10
+ @input = input
11
+ @code = code
12
+ end
13
+
14
+ def run
15
+ instance_eval(@code, __FILE__, __LINE__)
16
+ end
17
+
18
+ def value
19
+ @input
20
+ end
21
+
22
+ def method_missing(name)
23
+ raise NoMethodError
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end