coupler 0.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (258) hide show
  1. data/.document +5 -0
  2. data/.gitmodules +3 -0
  3. data/.rvmrc +1 -0
  4. data/.vimrc +40 -0
  5. data/Gemfile +27 -0
  6. data/Gemfile.lock +71 -0
  7. data/LICENSE +20 -0
  8. data/NOTES +6 -0
  9. data/README.rdoc +18 -0
  10. data/Rakefile +42 -0
  11. data/TODO +11 -0
  12. data/VERSION +1 -0
  13. data/bin/coupler +7 -0
  14. data/db/.gitignore +6 -0
  15. data/db/migrate/001_initial_schema.rb +166 -0
  16. data/db/migrate/002_stub.rb +4 -0
  17. data/db/migrate/003_stub.rb +4 -0
  18. data/db/migrate/004_create_comparisons.rb +28 -0
  19. data/db/migrate/005_move_database_name.rb +19 -0
  20. data/db/migrate/006_upgrade_comparisons.rb +34 -0
  21. data/db/migrate/007_add_which_to_comparisons.rb +23 -0
  22. data/db/migrate/008_add_result_field_to_transformations.rb +33 -0
  23. data/db/migrate/009_add_generated_flag_to_fields.rb +13 -0
  24. data/db/migrate/010_create_imports.rb +24 -0
  25. data/db/migrate/011_add_primary_key_type.rb +13 -0
  26. data/db/migrate/012_add_transformed_with_to_resources.rb +13 -0
  27. data/db/migrate/013_add_run_count_to_scenarios.rb +13 -0
  28. data/db/migrate/014_add_last_accessed_at_to_some_tables.rb +13 -0
  29. data/db/migrate/015_add_run_number_to_results.rb +15 -0
  30. data/db/migrate/016_fix_scenario_run_count.rb +27 -0
  31. data/db/migrate/017_rename_comparison_columns.rb +14 -0
  32. data/db/migrate/018_fix_scenario_linkage_type.rb +8 -0
  33. data/db/migrate/019_add_columns_to_imports.rb +24 -0
  34. data/db/migrate/020_rename_import_columns.rb +12 -0
  35. data/db/migrate/021_add_fields_to_connections.rb +15 -0
  36. data/db/migrate/022_remove_database_name_from_resources.rb +11 -0
  37. data/features/connections.feature +28 -0
  38. data/features/matchers.feature +35 -0
  39. data/features/projects.feature +11 -0
  40. data/features/resources.feature +62 -0
  41. data/features/scenarios.feature +45 -0
  42. data/features/step_definitions/coupler_steps.rb +145 -0
  43. data/features/step_definitions/matchers_steps.rb +26 -0
  44. data/features/step_definitions/resources_steps.rb +12 -0
  45. data/features/step_definitions/scenarios_steps.rb +7 -0
  46. data/features/step_definitions/transformations_steps.rb +3 -0
  47. data/features/support/env.rb +128 -0
  48. data/features/transformations.feature +22 -0
  49. data/features/wizard.feature +10 -0
  50. data/gfx/coupler-header.svg +213 -0
  51. data/gfx/coupler-sidebar.svg +656 -0
  52. data/gfx/coupler.svg +184 -0
  53. data/gfx/icon.svg +75 -0
  54. data/lib/coupler/base.rb +63 -0
  55. data/lib/coupler/config.rb +128 -0
  56. data/lib/coupler/data_uploader.rb +20 -0
  57. data/lib/coupler/database.rb +31 -0
  58. data/lib/coupler/extensions/connections.rb +57 -0
  59. data/lib/coupler/extensions/exceptions.rb +58 -0
  60. data/lib/coupler/extensions/imports.rb +43 -0
  61. data/lib/coupler/extensions/jobs.rb +21 -0
  62. data/lib/coupler/extensions/matchers.rb +64 -0
  63. data/lib/coupler/extensions/projects.rb +62 -0
  64. data/lib/coupler/extensions/resources.rb +89 -0
  65. data/lib/coupler/extensions/results.rb +100 -0
  66. data/lib/coupler/extensions/scenarios.rb +50 -0
  67. data/lib/coupler/extensions/transformations.rb +70 -0
  68. data/lib/coupler/extensions/transformers.rb +58 -0
  69. data/lib/coupler/extensions.rb +16 -0
  70. data/lib/coupler/helpers.rb +121 -0
  71. data/lib/coupler/import_buffer.rb +48 -0
  72. data/lib/coupler/logger.rb +16 -0
  73. data/lib/coupler/models/common_model.rb +104 -0
  74. data/lib/coupler/models/comparison.rb +166 -0
  75. data/lib/coupler/models/connection.rb +59 -0
  76. data/lib/coupler/models/field.rb +55 -0
  77. data/lib/coupler/models/import.rb +238 -0
  78. data/lib/coupler/models/job.rb +42 -0
  79. data/lib/coupler/models/jobify.rb +17 -0
  80. data/lib/coupler/models/matcher.rb +36 -0
  81. data/lib/coupler/models/project.rb +40 -0
  82. data/lib/coupler/models/resource.rb +287 -0
  83. data/lib/coupler/models/result.rb +92 -0
  84. data/lib/coupler/models/scenario/runner.rb +357 -0
  85. data/lib/coupler/models/scenario.rb +115 -0
  86. data/lib/coupler/models/transformation.rb +117 -0
  87. data/lib/coupler/models/transformer/runner.rb +28 -0
  88. data/lib/coupler/models/transformer.rb +110 -0
  89. data/lib/coupler/models.rb +30 -0
  90. data/lib/coupler/runner.rb +76 -0
  91. data/lib/coupler/scheduler.rb +56 -0
  92. data/lib/coupler.rb +34 -0
  93. data/log/.gitignore +1 -0
  94. data/misc/README +5 -0
  95. data/misc/jruby-json.license +57 -0
  96. data/misc/rack-flash.license +22 -0
  97. data/script/dbconsole.rb +5 -0
  98. data/src/edu/vanderbilt/coupler/Main.java +116 -0
  99. data/src/edu/vanderbilt/coupler/jruby.properties +1 -0
  100. data/tasks/annotations.rake +84 -0
  101. data/tasks/db.rake +120 -0
  102. data/tasks/environment.rake +12 -0
  103. data/tasks/jeweler.rake +43 -0
  104. data/tasks/package.rake +58 -0
  105. data/tasks/rdoc.rake +13 -0
  106. data/tasks/test.rake +63 -0
  107. data/tasks/vendor.rake +43 -0
  108. data/test/README.txt +6 -0
  109. data/test/config.yml +9 -0
  110. data/test/coupler/models/test_import.rb +221 -0
  111. data/test/factories.rb +91 -0
  112. data/test/fixtures/duplicate-keys.csv +5 -0
  113. data/test/fixtures/no-headers.csv +50 -0
  114. data/test/fixtures/people.csv +51 -0
  115. data/test/fixtures/varying-row-size.csv +4 -0
  116. data/test/helper.rb +156 -0
  117. data/test/integration/extensions/test_connections.rb +80 -0
  118. data/test/integration/extensions/test_imports.rb +94 -0
  119. data/test/integration/extensions/test_jobs.rb +52 -0
  120. data/test/integration/extensions/test_matchers.rb +134 -0
  121. data/test/integration/extensions/test_projects.rb +82 -0
  122. data/test/integration/extensions/test_resources.rb +150 -0
  123. data/test/integration/extensions/test_results.rb +89 -0
  124. data/test/integration/extensions/test_scenarios.rb +88 -0
  125. data/test/integration/extensions/test_transformations.rb +113 -0
  126. data/test/integration/extensions/test_transformers.rb +80 -0
  127. data/test/integration/test_field.rb +45 -0
  128. data/test/integration/test_import.rb +78 -0
  129. data/test/integration/test_running_scenarios.rb +379 -0
  130. data/test/integration/test_transformation.rb +56 -0
  131. data/test/integration/test_transforming.rb +154 -0
  132. data/test/table_sets.rb +76 -0
  133. data/test/unit/models/test_common_model.rb +130 -0
  134. data/test/unit/models/test_comparison.rb +619 -0
  135. data/test/unit/models/test_connection.rb +115 -0
  136. data/test/unit/models/test_field.rb +99 -0
  137. data/test/unit/models/test_import.rb +130 -0
  138. data/test/unit/models/test_job.rb +115 -0
  139. data/test/unit/models/test_matcher.rb +82 -0
  140. data/test/unit/models/test_project.rb +102 -0
  141. data/test/unit/models/test_resource.rb +564 -0
  142. data/test/unit/models/test_result.rb +90 -0
  143. data/test/unit/models/test_scenario.rb +199 -0
  144. data/test/unit/models/test_transformation.rb +193 -0
  145. data/test/unit/models/test_transformer.rb +188 -0
  146. data/test/unit/test_base.rb +60 -0
  147. data/test/unit/test_data_uploader.rb +27 -0
  148. data/test/unit/test_database.rb +23 -0
  149. data/test/unit/test_helpers.rb +58 -0
  150. data/test/unit/test_logger.rb +10 -0
  151. data/test/unit/test_models.rb +12 -0
  152. data/test/unit/test_runner.rb +76 -0
  153. data/test/unit/test_scheduler.rb +66 -0
  154. data/uploads/.gitignore +2 -0
  155. data/vendor/java/.gitignore +5 -0
  156. data/webroot/public/css/960.css +1 -0
  157. data/webroot/public/css/dataTables.css +1057 -0
  158. data/webroot/public/css/jquery-ui.css +572 -0
  159. data/webroot/public/css/jquery.treeview.css +68 -0
  160. data/webroot/public/css/reset.css +1 -0
  161. data/webroot/public/css/style.css +504 -0
  162. data/webroot/public/css/text.css +1 -0
  163. data/webroot/public/favicon.ico +0 -0
  164. data/webroot/public/images/12_col.gif +0 -0
  165. data/webroot/public/images/16_col.gif +0 -0
  166. data/webroot/public/images/add.png +0 -0
  167. data/webroot/public/images/ajax-loader.gif +0 -0
  168. data/webroot/public/images/cog.png +0 -0
  169. data/webroot/public/images/coupler.png +0 -0
  170. data/webroot/public/images/foo.png +0 -0
  171. data/webroot/public/images/hammer.png +0 -0
  172. data/webroot/public/images/header.png +0 -0
  173. data/webroot/public/images/home.gif +0 -0
  174. data/webroot/public/images/jobs.gif +0 -0
  175. data/webroot/public/images/sidebar-bottom.png +0 -0
  176. data/webroot/public/images/sidebar.png +0 -0
  177. data/webroot/public/images/treeview-default-line.gif +0 -0
  178. data/webroot/public/images/treeview-default.gif +0 -0
  179. data/webroot/public/images/ui-anim_basic_16x16.gif +0 -0
  180. data/webroot/public/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  181. data/webroot/public/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  182. data/webroot/public/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  183. data/webroot/public/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  184. data/webroot/public/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  185. data/webroot/public/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  186. data/webroot/public/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  187. data/webroot/public/images/ui-bg_highlight-hard_30_565356_1x100.png +0 -0
  188. data/webroot/public/images/ui-bg_highlight-hard_75_888588_1x100.png +0 -0
  189. data/webroot/public/images/ui-bg_highlight-soft_30_6e3b3a_1x100.png +0 -0
  190. data/webroot/public/images/ui-bg_highlight-soft_35_8e8b8e_1x100.png +0 -0
  191. data/webroot/public/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  192. data/webroot/public/images/ui-icons_222222_256x240.png +0 -0
  193. data/webroot/public/images/ui-icons_2e83ff_256x240.png +0 -0
  194. data/webroot/public/images/ui-icons_454545_256x240.png +0 -0
  195. data/webroot/public/images/ui-icons_888888_256x240.png +0 -0
  196. data/webroot/public/images/ui-icons_cd0a0a_256x240.png +0 -0
  197. data/webroot/public/images/ui-icons_ffffff_256x240.png +0 -0
  198. data/webroot/public/js/ajaxupload.js +673 -0
  199. data/webroot/public/js/application.js +40 -0
  200. data/webroot/public/js/jquery-ui.combobox.js +98 -0
  201. data/webroot/public/js/jquery-ui.js +9867 -0
  202. data/webroot/public/js/jquery-ui.min.js +559 -0
  203. data/webroot/public/js/jquery.dataTables.min.js +587 -0
  204. data/webroot/public/js/jquery.min.js +154 -0
  205. data/webroot/public/js/jquery.timeago.js +140 -0
  206. data/webroot/public/js/jquery.tooltip.min.js +19 -0
  207. data/webroot/public/js/jquery.treeview.min.js +15 -0
  208. data/webroot/public/js/resource.js +11 -0
  209. data/webroot/public/js/results.js +56 -0
  210. data/webroot/public/js/transformations.js +95 -0
  211. data/webroot/views/connections/index.erb +5 -0
  212. data/webroot/views/connections/list.erb +34 -0
  213. data/webroot/views/connections/new.erb +55 -0
  214. data/webroot/views/connections/show.erb +36 -0
  215. data/webroot/views/imports/edit.erb +60 -0
  216. data/webroot/views/imports/form.erb +81 -0
  217. data/webroot/views/imports/new.erb +89 -0
  218. data/webroot/views/index.erb +12 -0
  219. data/webroot/views/jobs/index.erb +7 -0
  220. data/webroot/views/jobs/list.erb +24 -0
  221. data/webroot/views/layout.erb +38 -0
  222. data/webroot/views/matchers/form.erb +250 -0
  223. data/webroot/views/matchers/list.erb +32 -0
  224. data/webroot/views/projects/form.erb +14 -0
  225. data/webroot/views/projects/index.erb +96 -0
  226. data/webroot/views/projects/show.erb +24 -0
  227. data/webroot/views/resources/edit.erb +88 -0
  228. data/webroot/views/resources/index.erb +5 -0
  229. data/webroot/views/resources/list.erb +27 -0
  230. data/webroot/views/resources/new.erb +121 -0
  231. data/webroot/views/resources/show.erb +86 -0
  232. data/webroot/views/resources/transform.erb +2 -0
  233. data/webroot/views/results/csv.erb +12 -0
  234. data/webroot/views/results/details.erb +15 -0
  235. data/webroot/views/results/index.erb +2 -0
  236. data/webroot/views/results/list.erb +22 -0
  237. data/webroot/views/results/record.erb +24 -0
  238. data/webroot/views/results/show.erb +68 -0
  239. data/webroot/views/scenarios/index.erb +5 -0
  240. data/webroot/views/scenarios/list.erb +20 -0
  241. data/webroot/views/scenarios/new.erb +99 -0
  242. data/webroot/views/scenarios/run.erb +2 -0
  243. data/webroot/views/scenarios/show.erb +50 -0
  244. data/webroot/views/sidebar.erb +106 -0
  245. data/webroot/views/transformations/create.erb +115 -0
  246. data/webroot/views/transformations/for.erb +16 -0
  247. data/webroot/views/transformations/index.erb +2 -0
  248. data/webroot/views/transformations/list.erb +29 -0
  249. data/webroot/views/transformations/new.erb +126 -0
  250. data/webroot/views/transformations/preview.erb +46 -0
  251. data/webroot/views/transformers/edit.erb +6 -0
  252. data/webroot/views/transformers/form.erb +58 -0
  253. data/webroot/views/transformers/index.erb +2 -0
  254. data/webroot/views/transformers/list.erb +25 -0
  255. data/webroot/views/transformers/new.erb +5 -0
  256. data/webroot/views/transformers/preview.erb +23 -0
  257. data/webroot/views/transformers/show.erb +0 -0
  258. metadata +558 -0
@@ -0,0 +1,357 @@
1
+ module Coupler
2
+ module Models
3
+ class Scenario
4
+ class Runner
5
+ LIMIT = 10000
6
+
7
+ def initialize(parent, &progress)
8
+ @parent = parent
9
+ @progress = progress
10
+ @matcher = parent.matcher
11
+ @type = parent.linkage_type
12
+ if @type == 'cross-linkage'
13
+ @resources = [parent.resource_1, parent.resource_1]
14
+ else
15
+ @resources = parent.resources
16
+ end
17
+ @run_number = @parent.run_count + 1
18
+ @mutex = Mutex.new
19
+ @group_number = 0
20
+ setup_pairs
21
+ create_tables
22
+ end
23
+
24
+ def run!
25
+ @parent.local_database do |scenario_db|
26
+ @groups_dataset = scenario_db[@groups_table_name]
27
+ @groups_buffer = ImportBuffer.new(@groups_column_names, @groups_dataset)
28
+ @join_dataset = scenario_db[@join_table_name]
29
+ @join_buffer = ImportBuffer.new([:record_id, :which, :group_id], @join_dataset)
30
+
31
+ # Group records for each dataset. This step is the same for both
32
+ # self-linkage and dual-linkage. However, this is the only step
33
+ # for self-linkage. Dual-linkage requires another pass after
34
+ # this.
35
+ #
36
+ # Cross-matching on a single dataset is treated as a dual-linkage.
37
+ #
38
+ @pairs = @phase_one_pairs
39
+ tw = ThreadsWait.new
40
+ databases_to_close = []
41
+ @resources.each_with_index do |resource, i|
42
+ dataset = resource.final_dataset
43
+ databases_to_close << dataset.db
44
+ primary_key = resource.primary_key_sym
45
+ which = @type == 'self-linkage' ? nil : i
46
+ resource_thread = phase_one_thread(dataset, primary_key, which)
47
+ tw.join_nowait(resource_thread)
48
+ end
49
+ tw.all_waits
50
+ databases_to_close.each do |db| # tidy up
51
+ db.disconnect
52
+ ::Sequel::DATABASES.delete(db)
53
+ end
54
+
55
+ if @type != 'self-linkage'
56
+ # Phase 2!
57
+ secondary_groups_ds = scenario_db[@secondary_groups_table_name].order(:group_1_id, :group_2_id)
58
+ @join_buffer = ImportBuffer.new([:group_1_id, :group_2_id], secondary_groups_ds)
59
+ @pairs = @phase_two_pairs
60
+ phase_two(@groups_dataset, :id)
61
+ @join_buffer.flush
62
+
63
+ # Update groups and groups_records
64
+ tw = ThreadsWait.new
65
+ count = secondary_groups_ds.count
66
+ offset = 0
67
+ last_group_id = @group_number
68
+ while offset < count
69
+ dataset = secondary_groups_ds.limit(10, offset)
70
+ offset += 10
71
+ dataset.each do |row|
72
+ thread = Thread.new(row[:group_1_id], row[:group_2_id]) do |group_1_id, group_2_id|
73
+ new_group_id = get_next_group_id
74
+ @join_dataset.filter(:group_id => [group_1_id, group_2_id]).update(:group_id => new_group_id)
75
+ @groups_dataset.filter(:id => group_1_id).update(:id => new_group_id)
76
+ @groups_dataset.filter(:id => group_2_id).delete
77
+ end
78
+ thread.abort_on_exception = true
79
+ tw.join_nowait(thread)
80
+ end
81
+ tw.all_waits
82
+ end
83
+
84
+ # Clean up groups and records that don't match
85
+ @groups_dataset.filter(:id <= last_group_id).delete
86
+ @join_dataset.filter(:group_id <= last_group_id).delete
87
+
88
+ # Don't need the secondary table anymore
89
+ scenario_db.drop_table(@secondary_groups_table_name)
90
+ end
91
+
92
+ # Calculate some summary stats
93
+ @join_dataset.group_and_count(:group_id, :which).each do |row|
94
+ col = row[:which] ? :"resource_#{row[:which]+1}_count" : :"resource_1_count"
95
+ @groups_dataset.filter(:id => row[:group_id]).update(col => row[:count])
96
+ end
97
+ end
98
+ end
99
+
100
+ private
101
+ def setup_pairs
102
+ @field_pairs = []
103
+ @phase_one_pairs = []
104
+ @phase_two_pairs = @type == 'self-linkage' ? nil : []
105
+ @matcher.comparisons.each do |comparison|
106
+ if !comparison.blocking?
107
+ fields = comparison.fields
108
+ @field_pairs << fields
109
+ @phase_one_pairs.push(fields.collect(&:name_sym))
110
+ if @phase_two_pairs
111
+ pair_sym = :"pair_#{@phase_two_pairs.length}"
112
+ @phase_two_pairs.push([pair_sym, pair_sym])
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ def create_tables
119
+ # Yes, this could be done during setup_pairs, but I think
120
+ # this is more clean. Also, there will only be very few pairs.
121
+ groups_columns = [
122
+ {:name => :id, :type => Integer, :primary_key => true}
123
+ ]
124
+ @group_value_fields = []
125
+ @field_pairs.each_with_index do |(field_1, field_2), i|
126
+ type_1 = field_1.local_column_options[:type]
127
+ type_2 = field_2.local_column_options[:type]
128
+ if type_1 != type_2
129
+ # FIXME!
130
+ raise "BOOM!!"
131
+ end
132
+ @group_value_fields.push([field_1.name_sym, field_2.name_sym])
133
+ groups_columns.push({:name => :"pair_#{i}", :type => type_1})
134
+ end
135
+ @groups_column_names = groups_columns.collect { |c| c[:name] }
136
+ @groups_table_name = :"groups_#{@run_number}"
137
+
138
+ # Add extra columns to the groups table for summary stats
139
+ @resources.length.times do |i|
140
+ groups_columns << {:name => :"resource_#{i+1}_count", :type => Integer}
141
+ end
142
+
143
+ key_types = @resources.collect { |r| r.primary_key_type }.uniq
144
+ record_id_type = key_types.length == 1 ? key_types[0] : String
145
+ @join_table_name = :"groups_records_#{@run_number}"
146
+
147
+ @parent.local_database do |scenario_db|
148
+ scenario_db.create_table!(@groups_table_name) do
149
+ columns.push(*groups_columns)
150
+ end
151
+ scenario_db.create_table!(@join_table_name) do
152
+ column :record_id, record_id_type
153
+ Integer :which
154
+ Integer :group_id, :index => true
155
+ index [:group_id, :which, :record_id] # speedy filtering when showing results
156
+ end
157
+ if @type != 'self-linkage'
158
+ # Need another groups table
159
+ @secondary_groups_table_name = :"groups_groups_#{@run_number}"
160
+ scenario_db.create_table!(@secondary_groups_table_name) do
161
+ Integer :group_1_id
162
+ Integer :group_2_id
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ def phase_one_thread(dataset, primary_key, which)
169
+ thread = Thread.new do
170
+ # Apply filters and what not from comparisons
171
+ dataset = dataset.select(primary_key)
172
+ @matcher.comparisons.each do |comparison|
173
+ dataset = comparison.apply(dataset, which)
174
+ end
175
+ dataset = dataset.order_more(primary_key)
176
+
177
+ # Do the work
178
+ local_tw = ThreadsWait.new
179
+ threads = []
180
+ count = dataset.count
181
+ segments = count / LIMIT
182
+ segments += 1 if count % LIMIT > 0
183
+
184
+ segments.times do |segment_num|
185
+ ds = dataset.limit(LIMIT, LIMIT * segment_num)
186
+ thread = Thread.new do
187
+ prev_row = nil
188
+ group_id = nil
189
+
190
+ ds.each_with_index do |row, row_num|
191
+ if row_num > 0
192
+ result = compare_rows(prev_row, row, which)
193
+ if (result || which) && group_id.nil?
194
+ # If `which` is not nil, that means we're in the first
195
+ # stage of a dual or cross linkage. So, we should save
196
+ # groups that only have 1 record in them.
197
+ group_id = create_group(prev_row, which)
198
+ @join_buffer.add([prev_row[primary_key], which, group_id])
199
+ end
200
+ if result
201
+ @join_buffer.add([row[primary_key], which, group_id])
202
+ else
203
+ group_id = nil
204
+ end
205
+
206
+ # This stores the first record of this segment in order
207
+ # to check it against the last record of the previous
208
+ # segment. I don't like putting this inside the loop
209
+ # really, but it's better than making another database
210
+ # query.
211
+ if segment_num > 0 && row_num == 1
212
+ Thread.current[:head] = { :row => prev_row, :group_id => group_id }
213
+ end
214
+ end
215
+ prev_row = row
216
+ end
217
+ if which && group_id.nil?
218
+ # See above comment about `which`
219
+ group_id = create_group(prev_row, which)
220
+ @join_buffer.add([prev_row[primary_key], which, group_id])
221
+ end
222
+
223
+ # This stores the last record of this segment in order to
224
+ # check it against the first record of the next segment.
225
+ if segment_num < (segments - 1)
226
+ Thread.current[:tail] = { :row => prev_row, :group_id => group_id }
227
+ end
228
+ end
229
+ thread.abort_on_exception = true
230
+ threads << thread
231
+ local_tw.join_nowait(thread)
232
+ local_tw.next_wait if local_tw.threads.length == 10
233
+ end
234
+ local_tw.all_waits
235
+ @groups_buffer.flush
236
+ @join_buffer.flush
237
+
238
+ # Compare heads and tails from segments
239
+ if segments > 1
240
+ head = tail = nil
241
+ threads.each do |thread|
242
+ if tail
243
+ head = thread[:head]
244
+ if compare_rows(tail, head, which)
245
+ @join_dataset.filter({
246
+ :group_id => head[:group_id]
247
+ }).update(:group_id => tail[:group_id])
248
+ @groups_dataset.filter({
249
+ :id => head[:group_id]
250
+ }).delete
251
+ end
252
+ end
253
+ tail = thread[:tail]
254
+ end
255
+ end
256
+ end
257
+ thread.abort_on_exception = true
258
+ thread
259
+ end
260
+
261
+ def phase_two(dataset, primary_key)
262
+ # Add sorting only
263
+ @pairs.each do |fields|
264
+ dataset = dataset.order_more(*fields.uniq)
265
+ end
266
+ dataset = dataset.order_more(primary_key)
267
+
268
+ local_tw = ThreadsWait.new
269
+ threads = []
270
+ count = dataset.count
271
+ segments = count / LIMIT
272
+ segments += 1 if count % LIMIT > 0
273
+
274
+ segments.times do |segment_num|
275
+ ds = dataset.limit(LIMIT, LIMIT * segment_num)
276
+ thread = Thread.new do
277
+ prev_result = nil
278
+ prev_row = nil
279
+ skip = false
280
+
281
+ ds.each_with_index do |row, row_num|
282
+ if row_num > 0 && !skip
283
+ result = compare_rows(prev_row, row)
284
+ # There should only be one-to-one matches here.
285
+ if result
286
+ @join_buffer.add([prev_row[primary_key], row[primary_key]])
287
+ skip = true # Skip comparing this row to the next
288
+ end
289
+
290
+ if segment_num > 0 && row_num == 1 && !result
291
+ Thread.current[:head] = { :row => prev_row }
292
+ end
293
+ elsif skip
294
+ skip = false
295
+ end
296
+ prev_row = row
297
+ prev_result = result
298
+ end
299
+ if segment_num < (segments - 1) && !prev_result
300
+ Thread.current[:tail] = { :row => prev_row }
301
+ end
302
+ end
303
+ thread.abort_on_exception = true
304
+ threads << thread
305
+ local_tw.join_nowait(thread)
306
+ local_tw.next_wait if local_tw.threads.length == 10
307
+ end
308
+ local_tw.all_waits
309
+ @join_buffer.flush
310
+
311
+ # Compare heads and tails from segments
312
+ if threads.length > 1
313
+ head = tail = nil
314
+ threads.each do |thread|
315
+ if tail
316
+ head = thread[:head]
317
+ if head && compare_rows(tail[:row], head[:row])
318
+ @join_buffer.add([tail[:row][primary_key], head[:row][primary_key]])
319
+ end
320
+ end
321
+ tail = thread[:tail]
322
+ end
323
+ end
324
+ end
325
+
326
+ def compare_rows(row_1, row_2, which = nil)
327
+ values = []
328
+ @pairs.each do |fields|
329
+ value_1 = row_1[fields[which || 0]]
330
+ value_2 = row_2[fields[which || 1]]
331
+ if value_1 == value_2
332
+ values << value_1
333
+ else
334
+ values = nil
335
+ break
336
+ end
337
+ end
338
+ values
339
+ end
340
+
341
+ def create_group(row, which)
342
+ group_id = get_next_group_id
343
+ group_row = [group_id]
344
+ @group_value_fields.each do |fields|
345
+ group_row.push(row[fields[which || 0]])
346
+ end
347
+ @groups_buffer.add(group_row)
348
+ group_id
349
+ end
350
+
351
+ def get_next_group_id
352
+ @mutex.synchronize { @group_number += 1 }
353
+ end
354
+ end
355
+ end
356
+ end
357
+ end
@@ -0,0 +1,115 @@
1
+ module Coupler
2
+ module Models
3
+ class Scenario < Sequel::Model
4
+ class NoMatcherError < Exception; end
5
+ class ResourcesOutOfDateError < Exception; end
6
+
7
+ include CommonModel
8
+ include Jobify
9
+
10
+ attr_writer :resource_ids
11
+ many_to_one :project
12
+ many_to_one :resource_1, :class => "Coupler::Models::Resource"
13
+ many_to_one :resource_2, :class => "Coupler::Models::Resource"
14
+ one_to_one :matcher
15
+ one_to_many :results
16
+
17
+ def self.count_by_project
18
+ dataset.naked.group_and_count(:project_id).to_hash(:project_id, :count)
19
+ end
20
+
21
+ def status
22
+ if matcher.nil?
23
+ "no_matcher"
24
+ elsif resources.any? { |r| r.status == "out_of_date" }
25
+ "resources_out_of_date"
26
+ else
27
+ "ok"
28
+ end
29
+ end
30
+
31
+ def resources
32
+ if resource_1
33
+ resource_2 ? [resource_1, resource_2] : [resource_1]
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ def local_database(&block)
40
+ Sequel.connect(local_connection_string, {
41
+ :loggers => [Coupler::Logger.instance],
42
+ :max_connections => 50,
43
+ :pool_timeout => 60
44
+ }, &block)
45
+ end
46
+
47
+ def run!(&progress)
48
+ case status
49
+ when 'no_matcher' then raise NoMatcherError
50
+ when 'resources_out_of_date' then raise ResourcesOutOfDateError
51
+ end
52
+
53
+ runner = Runner.new(self, &progress)
54
+ runner.run!
55
+
56
+ update(:run_count => run_count + 1, :last_run_at => Time.now)
57
+ result = Result.new(:scenario => self, :run_number => run_count)
58
+ result.save
59
+ end
60
+
61
+ def set_linkage_type
62
+ if resource_1
63
+ if resource_2
64
+ self.linkage_type = "dual-linkage"
65
+ elsif matcher && matcher.cross_match?
66
+ self.linkage_type = "cross-linkage"
67
+ else
68
+ self.linkage_type = "self-linkage"
69
+ end
70
+ else
71
+ self.linkage_type = "N/A"
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def local_connection_string
78
+ Base.connection_string("scenario_#{id}")
79
+ end
80
+
81
+ def before_validation
82
+ super
83
+ if @resource_ids.is_a?(Array)
84
+ objects = project.resources_dataset.filter(:id => @resource_ids[0..1].compact).all
85
+ self.resource_1_id = objects[0].nil? ? nil : objects[0].id
86
+ self.resource_2_id = objects[1].nil? ? nil : objects[1].id
87
+ end
88
+ end
89
+
90
+ def validate
91
+ super
92
+ validates_presence :name
93
+ validates_unique [:name, :project_id]
94
+ if resource_1_id.nil?
95
+ errors.add(:base, "At least one resource is required")
96
+ end
97
+ end
98
+
99
+ def before_create
100
+ super
101
+ self.slug ||= name.downcase.gsub(/\s+/, "_")
102
+ set_linkage_type
103
+ end
104
+
105
+ # There is currently no plan to let a user change resources
106
+ # after a scenario has been created.
107
+ #def before_update
108
+ #super
109
+ #set_linkage_type
110
+ #end
111
+ end
112
+ end
113
+ end
114
+
115
+ require File.join(File.dirname(__FILE__), 'scenario', 'runner')
@@ -0,0 +1,117 @@
1
+ module Coupler
2
+ module Models
3
+ class Transformation < Sequel::Model
4
+ include CommonModel
5
+ many_to_one :resource
6
+ many_to_one :source_field, :class => Field
7
+ many_to_one :result_field, :class => Field
8
+ many_to_one :transformer
9
+
10
+ plugin :nested_attributes
11
+ nested_attributes :result_field
12
+ nested_attributes :transformer, :destroy => false
13
+
14
+ def transform(data)
15
+ transformer.transform(data, {
16
+ :in => source_field.name.to_sym,
17
+ :out => result_field.name.to_sym
18
+ })
19
+ end
20
+
21
+ def field_changes
22
+ transformer.field_changes(source_field)
23
+ end
24
+
25
+ # NOTE: The fact that the aliased name doesn't have an = at the end is
26
+ # important. Ruby methods with names that have = at the end always
27
+ # return the RHS value, regardless of what the method actually returns.
28
+ # The only way to grab the associated object that gets created from the
29
+ # nested attributes methods is by fetching the return value.
30
+ #
31
+ alias :original_result_field_attributes :result_field_attributes=
32
+ def result_field_attributes=(h)
33
+ @staged_result_field = self.original_result_field_attributes(h.merge({
34
+ :is_generated => true
35
+ }))
36
+ end
37
+
38
+ def deletable?
39
+ position == self.class.max(:position) &&
40
+ (result_field.nil? || !result_field.is_generated || result_field.scenarios_dataset.count == 0)
41
+ end
42
+
43
+ private
44
+ def before_validation
45
+ super
46
+ if source_field_id && !result_field_id && !@staged_result_field
47
+ self.result_field_id = source_field_id
48
+ end
49
+
50
+ if @staged_result_field && transformer && source_field && resource_id
51
+ hash = transformer.field_changes(source_field).values[0]
52
+ if hash.empty?
53
+ hash.update({
54
+ :type => source_field[:type],
55
+ :db_type => source_field[:db_type]
56
+ })
57
+ end
58
+ hash[:resource_id] = resource_id
59
+ @staged_result_field.set(hash)
60
+ end
61
+ end
62
+
63
+ def validate
64
+ super
65
+ validates_presence [:resource_id, :source_field_id]
66
+ if transformer.nil?
67
+ errors.add(:transformer_id, "is not present")
68
+ end
69
+ if errors.empty?
70
+ source_field = resource.fields_dataset[:id => source_field_id]
71
+ if source_field.nil?
72
+ errors.add(:source_field_id, "is invalid")
73
+ else
74
+ if transformer.allowed_types.is_a?(Array) && !transformer.allowed_types.include?(source_field.final_type)
75
+ errors.add(:base, "#{transformer.name} cannot transform type '#{source_field.final_type}'")
76
+ end
77
+ end
78
+
79
+ if !@staged_result_field
80
+ result_field = result_field_id ? resource.fields_dataset[:id => result_field_id] : nil
81
+ if result_field.nil?
82
+ errors.add(:result_field_id, "is invalid")
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ def before_create
89
+ super
90
+ self.position ||= self.class.filter(:resource_id => resource_id).count + 1
91
+ end
92
+
93
+ def after_save
94
+ super
95
+ resource.refresh_fields!
96
+ end
97
+
98
+ def before_destroy
99
+ # Prevent all but the last transformation from being destroyed
100
+ #
101
+ # TODO: I'm putting off programming the complex logic required to
102
+ # enable deletion from the middle of a transformation stack.
103
+ #
104
+ super
105
+ deletable?
106
+ end
107
+
108
+ def after_destroy
109
+ super
110
+ if result_field && result_field.is_generated && self.class.filter(:result_field_id => result_field.id).count == 0
111
+ result_field.destroy
112
+ end
113
+ resource.refresh_fields! if resource
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,28 @@
1
+ module Coupler
2
+ module Models
3
+ class Transformer
4
+ class Runner
5
+ instance_methods.each do |m|
6
+ undef_method m unless m =~ /^__|^instance_eval$/
7
+ end
8
+
9
+ def initialize(code, input)
10
+ @input = input
11
+ @code = code
12
+ end
13
+
14
+ def run
15
+ instance_eval(@code, __FILE__, __LINE__)
16
+ end
17
+
18
+ def value
19
+ @input
20
+ end
21
+
22
+ def method_missing(name)
23
+ raise NoMethodError
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end