coupler 0.0.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rvmrc +1 -0
- data/.vimrc +40 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +71 -0
- data/LICENSE +20 -0
- data/NOTES +6 -0
- data/README.rdoc +18 -0
- data/Rakefile +42 -0
- data/TODO +11 -0
- data/VERSION +1 -0
- data/bin/coupler +7 -0
- data/db/.gitignore +6 -0
- data/db/migrate/001_initial_schema.rb +166 -0
- data/db/migrate/002_stub.rb +4 -0
- data/db/migrate/003_stub.rb +4 -0
- data/db/migrate/004_create_comparisons.rb +28 -0
- data/db/migrate/005_move_database_name.rb +19 -0
- data/db/migrate/006_upgrade_comparisons.rb +34 -0
- data/db/migrate/007_add_which_to_comparisons.rb +23 -0
- data/db/migrate/008_add_result_field_to_transformations.rb +33 -0
- data/db/migrate/009_add_generated_flag_to_fields.rb +13 -0
- data/db/migrate/010_create_imports.rb +24 -0
- data/db/migrate/011_add_primary_key_type.rb +13 -0
- data/db/migrate/012_add_transformed_with_to_resources.rb +13 -0
- data/db/migrate/013_add_run_count_to_scenarios.rb +13 -0
- data/db/migrate/014_add_last_accessed_at_to_some_tables.rb +13 -0
- data/db/migrate/015_add_run_number_to_results.rb +15 -0
- data/db/migrate/016_fix_scenario_run_count.rb +27 -0
- data/db/migrate/017_rename_comparison_columns.rb +14 -0
- data/db/migrate/018_fix_scenario_linkage_type.rb +8 -0
- data/db/migrate/019_add_columns_to_imports.rb +24 -0
- data/db/migrate/020_rename_import_columns.rb +12 -0
- data/db/migrate/021_add_fields_to_connections.rb +15 -0
- data/db/migrate/022_remove_database_name_from_resources.rb +11 -0
- data/features/connections.feature +28 -0
- data/features/matchers.feature +35 -0
- data/features/projects.feature +11 -0
- data/features/resources.feature +62 -0
- data/features/scenarios.feature +45 -0
- data/features/step_definitions/coupler_steps.rb +145 -0
- data/features/step_definitions/matchers_steps.rb +26 -0
- data/features/step_definitions/resources_steps.rb +12 -0
- data/features/step_definitions/scenarios_steps.rb +7 -0
- data/features/step_definitions/transformations_steps.rb +3 -0
- data/features/support/env.rb +128 -0
- data/features/transformations.feature +22 -0
- data/features/wizard.feature +10 -0
- data/gfx/coupler-header.svg +213 -0
- data/gfx/coupler-sidebar.svg +656 -0
- data/gfx/coupler.svg +184 -0
- data/gfx/icon.svg +75 -0
- data/lib/coupler/base.rb +63 -0
- data/lib/coupler/config.rb +128 -0
- data/lib/coupler/data_uploader.rb +20 -0
- data/lib/coupler/database.rb +31 -0
- data/lib/coupler/extensions/connections.rb +57 -0
- data/lib/coupler/extensions/exceptions.rb +58 -0
- data/lib/coupler/extensions/imports.rb +43 -0
- data/lib/coupler/extensions/jobs.rb +21 -0
- data/lib/coupler/extensions/matchers.rb +64 -0
- data/lib/coupler/extensions/projects.rb +62 -0
- data/lib/coupler/extensions/resources.rb +89 -0
- data/lib/coupler/extensions/results.rb +100 -0
- data/lib/coupler/extensions/scenarios.rb +50 -0
- data/lib/coupler/extensions/transformations.rb +70 -0
- data/lib/coupler/extensions/transformers.rb +58 -0
- data/lib/coupler/extensions.rb +16 -0
- data/lib/coupler/helpers.rb +121 -0
- data/lib/coupler/import_buffer.rb +48 -0
- data/lib/coupler/logger.rb +16 -0
- data/lib/coupler/models/common_model.rb +104 -0
- data/lib/coupler/models/comparison.rb +166 -0
- data/lib/coupler/models/connection.rb +59 -0
- data/lib/coupler/models/field.rb +55 -0
- data/lib/coupler/models/import.rb +238 -0
- data/lib/coupler/models/job.rb +42 -0
- data/lib/coupler/models/jobify.rb +17 -0
- data/lib/coupler/models/matcher.rb +36 -0
- data/lib/coupler/models/project.rb +40 -0
- data/lib/coupler/models/resource.rb +287 -0
- data/lib/coupler/models/result.rb +92 -0
- data/lib/coupler/models/scenario/runner.rb +357 -0
- data/lib/coupler/models/scenario.rb +115 -0
- data/lib/coupler/models/transformation.rb +117 -0
- data/lib/coupler/models/transformer/runner.rb +28 -0
- data/lib/coupler/models/transformer.rb +110 -0
- data/lib/coupler/models.rb +30 -0
- data/lib/coupler/runner.rb +76 -0
- data/lib/coupler/scheduler.rb +56 -0
- data/lib/coupler.rb +34 -0
- data/log/.gitignore +1 -0
- data/misc/README +5 -0
- data/misc/jruby-json.license +57 -0
- data/misc/rack-flash.license +22 -0
- data/script/dbconsole.rb +5 -0
- data/src/edu/vanderbilt/coupler/Main.java +116 -0
- data/src/edu/vanderbilt/coupler/jruby.properties +1 -0
- data/tasks/annotations.rake +84 -0
- data/tasks/db.rake +120 -0
- data/tasks/environment.rake +12 -0
- data/tasks/jeweler.rake +43 -0
- data/tasks/package.rake +58 -0
- data/tasks/rdoc.rake +13 -0
- data/tasks/test.rake +63 -0
- data/tasks/vendor.rake +43 -0
- data/test/README.txt +6 -0
- data/test/config.yml +9 -0
- data/test/coupler/models/test_import.rb +221 -0
- data/test/factories.rb +91 -0
- data/test/fixtures/duplicate-keys.csv +5 -0
- data/test/fixtures/no-headers.csv +50 -0
- data/test/fixtures/people.csv +51 -0
- data/test/fixtures/varying-row-size.csv +4 -0
- data/test/helper.rb +156 -0
- data/test/integration/extensions/test_connections.rb +80 -0
- data/test/integration/extensions/test_imports.rb +94 -0
- data/test/integration/extensions/test_jobs.rb +52 -0
- data/test/integration/extensions/test_matchers.rb +134 -0
- data/test/integration/extensions/test_projects.rb +82 -0
- data/test/integration/extensions/test_resources.rb +150 -0
- data/test/integration/extensions/test_results.rb +89 -0
- data/test/integration/extensions/test_scenarios.rb +88 -0
- data/test/integration/extensions/test_transformations.rb +113 -0
- data/test/integration/extensions/test_transformers.rb +80 -0
- data/test/integration/test_field.rb +45 -0
- data/test/integration/test_import.rb +78 -0
- data/test/integration/test_running_scenarios.rb +379 -0
- data/test/integration/test_transformation.rb +56 -0
- data/test/integration/test_transforming.rb +154 -0
- data/test/table_sets.rb +76 -0
- data/test/unit/models/test_common_model.rb +130 -0
- data/test/unit/models/test_comparison.rb +619 -0
- data/test/unit/models/test_connection.rb +115 -0
- data/test/unit/models/test_field.rb +99 -0
- data/test/unit/models/test_import.rb +130 -0
- data/test/unit/models/test_job.rb +115 -0
- data/test/unit/models/test_matcher.rb +82 -0
- data/test/unit/models/test_project.rb +102 -0
- data/test/unit/models/test_resource.rb +564 -0
- data/test/unit/models/test_result.rb +90 -0
- data/test/unit/models/test_scenario.rb +199 -0
- data/test/unit/models/test_transformation.rb +193 -0
- data/test/unit/models/test_transformer.rb +188 -0
- data/test/unit/test_base.rb +60 -0
- data/test/unit/test_data_uploader.rb +27 -0
- data/test/unit/test_database.rb +23 -0
- data/test/unit/test_helpers.rb +58 -0
- data/test/unit/test_logger.rb +10 -0
- data/test/unit/test_models.rb +12 -0
- data/test/unit/test_runner.rb +76 -0
- data/test/unit/test_scheduler.rb +66 -0
- data/uploads/.gitignore +2 -0
- data/vendor/java/.gitignore +5 -0
- data/webroot/public/css/960.css +1 -0
- data/webroot/public/css/dataTables.css +1057 -0
- data/webroot/public/css/jquery-ui.css +572 -0
- data/webroot/public/css/jquery.treeview.css +68 -0
- data/webroot/public/css/reset.css +1 -0
- data/webroot/public/css/style.css +504 -0
- data/webroot/public/css/text.css +1 -0
- data/webroot/public/favicon.ico +0 -0
- data/webroot/public/images/12_col.gif +0 -0
- data/webroot/public/images/16_col.gif +0 -0
- data/webroot/public/images/add.png +0 -0
- data/webroot/public/images/ajax-loader.gif +0 -0
- data/webroot/public/images/cog.png +0 -0
- data/webroot/public/images/coupler.png +0 -0
- data/webroot/public/images/foo.png +0 -0
- data/webroot/public/images/hammer.png +0 -0
- data/webroot/public/images/header.png +0 -0
- data/webroot/public/images/home.gif +0 -0
- data/webroot/public/images/jobs.gif +0 -0
- data/webroot/public/images/sidebar-bottom.png +0 -0
- data/webroot/public/images/sidebar.png +0 -0
- data/webroot/public/images/treeview-default-line.gif +0 -0
- data/webroot/public/images/treeview-default.gif +0 -0
- data/webroot/public/images/ui-anim_basic_16x16.gif +0 -0
- data/webroot/public/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/webroot/public/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/webroot/public/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/webroot/public/images/ui-bg_highlight-hard_30_565356_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-hard_75_888588_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_30_6e3b3a_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_35_8e8b8e_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/webroot/public/images/ui-icons_222222_256x240.png +0 -0
- data/webroot/public/images/ui-icons_2e83ff_256x240.png +0 -0
- data/webroot/public/images/ui-icons_454545_256x240.png +0 -0
- data/webroot/public/images/ui-icons_888888_256x240.png +0 -0
- data/webroot/public/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/webroot/public/images/ui-icons_ffffff_256x240.png +0 -0
- data/webroot/public/js/ajaxupload.js +673 -0
- data/webroot/public/js/application.js +40 -0
- data/webroot/public/js/jquery-ui.combobox.js +98 -0
- data/webroot/public/js/jquery-ui.js +9867 -0
- data/webroot/public/js/jquery-ui.min.js +559 -0
- data/webroot/public/js/jquery.dataTables.min.js +587 -0
- data/webroot/public/js/jquery.min.js +154 -0
- data/webroot/public/js/jquery.timeago.js +140 -0
- data/webroot/public/js/jquery.tooltip.min.js +19 -0
- data/webroot/public/js/jquery.treeview.min.js +15 -0
- data/webroot/public/js/resource.js +11 -0
- data/webroot/public/js/results.js +56 -0
- data/webroot/public/js/transformations.js +95 -0
- data/webroot/views/connections/index.erb +5 -0
- data/webroot/views/connections/list.erb +34 -0
- data/webroot/views/connections/new.erb +55 -0
- data/webroot/views/connections/show.erb +36 -0
- data/webroot/views/imports/edit.erb +60 -0
- data/webroot/views/imports/form.erb +81 -0
- data/webroot/views/imports/new.erb +89 -0
- data/webroot/views/index.erb +12 -0
- data/webroot/views/jobs/index.erb +7 -0
- data/webroot/views/jobs/list.erb +24 -0
- data/webroot/views/layout.erb +38 -0
- data/webroot/views/matchers/form.erb +250 -0
- data/webroot/views/matchers/list.erb +32 -0
- data/webroot/views/projects/form.erb +14 -0
- data/webroot/views/projects/index.erb +96 -0
- data/webroot/views/projects/show.erb +24 -0
- data/webroot/views/resources/edit.erb +88 -0
- data/webroot/views/resources/index.erb +5 -0
- data/webroot/views/resources/list.erb +27 -0
- data/webroot/views/resources/new.erb +121 -0
- data/webroot/views/resources/show.erb +86 -0
- data/webroot/views/resources/transform.erb +2 -0
- data/webroot/views/results/csv.erb +12 -0
- data/webroot/views/results/details.erb +15 -0
- data/webroot/views/results/index.erb +2 -0
- data/webroot/views/results/list.erb +22 -0
- data/webroot/views/results/record.erb +24 -0
- data/webroot/views/results/show.erb +68 -0
- data/webroot/views/scenarios/index.erb +5 -0
- data/webroot/views/scenarios/list.erb +20 -0
- data/webroot/views/scenarios/new.erb +99 -0
- data/webroot/views/scenarios/run.erb +2 -0
- data/webroot/views/scenarios/show.erb +50 -0
- data/webroot/views/sidebar.erb +106 -0
- data/webroot/views/transformations/create.erb +115 -0
- data/webroot/views/transformations/for.erb +16 -0
- data/webroot/views/transformations/index.erb +2 -0
- data/webroot/views/transformations/list.erb +29 -0
- data/webroot/views/transformations/new.erb +126 -0
- data/webroot/views/transformations/preview.erb +46 -0
- data/webroot/views/transformers/edit.erb +6 -0
- data/webroot/views/transformers/form.erb +58 -0
- data/webroot/views/transformers/index.erb +2 -0
- data/webroot/views/transformers/list.erb +25 -0
- data/webroot/views/transformers/new.erb +5 -0
- data/webroot/views/transformers/preview.erb +23 -0
- data/webroot/views/transformers/show.erb +0 -0
- metadata +558 -0
@@ -0,0 +1,357 @@
|
|
1
|
+
module Coupler
|
2
|
+
module Models
|
3
|
+
class Scenario
|
4
|
+
class Runner
|
5
|
+
LIMIT = 10000
|
6
|
+
|
7
|
+
def initialize(parent, &progress)
|
8
|
+
@parent = parent
|
9
|
+
@progress = progress
|
10
|
+
@matcher = parent.matcher
|
11
|
+
@type = parent.linkage_type
|
12
|
+
if @type == 'cross-linkage'
|
13
|
+
@resources = [parent.resource_1, parent.resource_1]
|
14
|
+
else
|
15
|
+
@resources = parent.resources
|
16
|
+
end
|
17
|
+
@run_number = @parent.run_count + 1
|
18
|
+
@mutex = Mutex.new
|
19
|
+
@group_number = 0
|
20
|
+
setup_pairs
|
21
|
+
create_tables
|
22
|
+
end
|
23
|
+
|
24
|
+
def run!
|
25
|
+
@parent.local_database do |scenario_db|
|
26
|
+
@groups_dataset = scenario_db[@groups_table_name]
|
27
|
+
@groups_buffer = ImportBuffer.new(@groups_column_names, @groups_dataset)
|
28
|
+
@join_dataset = scenario_db[@join_table_name]
|
29
|
+
@join_buffer = ImportBuffer.new([:record_id, :which, :group_id], @join_dataset)
|
30
|
+
|
31
|
+
# Group records for each dataset. This step is the same for both
|
32
|
+
# self-linkage and dual-linkage. However, this is the only step
|
33
|
+
# for self-linkage. Dual-linkage requires another pass after
|
34
|
+
# this.
|
35
|
+
#
|
36
|
+
# Cross-matching on a single dataset is treated as a dual-linkage.
|
37
|
+
#
|
38
|
+
@pairs = @phase_one_pairs
|
39
|
+
tw = ThreadsWait.new
|
40
|
+
databases_to_close = []
|
41
|
+
@resources.each_with_index do |resource, i|
|
42
|
+
dataset = resource.final_dataset
|
43
|
+
databases_to_close << dataset.db
|
44
|
+
primary_key = resource.primary_key_sym
|
45
|
+
which = @type == 'self-linkage' ? nil : i
|
46
|
+
resource_thread = phase_one_thread(dataset, primary_key, which)
|
47
|
+
tw.join_nowait(resource_thread)
|
48
|
+
end
|
49
|
+
tw.all_waits
|
50
|
+
databases_to_close.each do |db| # tidy up
|
51
|
+
db.disconnect
|
52
|
+
::Sequel::DATABASES.delete(db)
|
53
|
+
end
|
54
|
+
|
55
|
+
if @type != 'self-linkage'
|
56
|
+
# Phase 2!
|
57
|
+
secondary_groups_ds = scenario_db[@secondary_groups_table_name].order(:group_1_id, :group_2_id)
|
58
|
+
@join_buffer = ImportBuffer.new([:group_1_id, :group_2_id], secondary_groups_ds)
|
59
|
+
@pairs = @phase_two_pairs
|
60
|
+
phase_two(@groups_dataset, :id)
|
61
|
+
@join_buffer.flush
|
62
|
+
|
63
|
+
# Update groups and groups_records
|
64
|
+
tw = ThreadsWait.new
|
65
|
+
count = secondary_groups_ds.count
|
66
|
+
offset = 0
|
67
|
+
last_group_id = @group_number
|
68
|
+
while offset < count
|
69
|
+
dataset = secondary_groups_ds.limit(10, offset)
|
70
|
+
offset += 10
|
71
|
+
dataset.each do |row|
|
72
|
+
thread = Thread.new(row[:group_1_id], row[:group_2_id]) do |group_1_id, group_2_id|
|
73
|
+
new_group_id = get_next_group_id
|
74
|
+
@join_dataset.filter(:group_id => [group_1_id, group_2_id]).update(:group_id => new_group_id)
|
75
|
+
@groups_dataset.filter(:id => group_1_id).update(:id => new_group_id)
|
76
|
+
@groups_dataset.filter(:id => group_2_id).delete
|
77
|
+
end
|
78
|
+
thread.abort_on_exception = true
|
79
|
+
tw.join_nowait(thread)
|
80
|
+
end
|
81
|
+
tw.all_waits
|
82
|
+
end
|
83
|
+
|
84
|
+
# Clean up groups and records that don't match
|
85
|
+
@groups_dataset.filter(:id <= last_group_id).delete
|
86
|
+
@join_dataset.filter(:group_id <= last_group_id).delete
|
87
|
+
|
88
|
+
# Don't need the secondary table anymore
|
89
|
+
scenario_db.drop_table(@secondary_groups_table_name)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Calculate some summary stats
|
93
|
+
@join_dataset.group_and_count(:group_id, :which).each do |row|
|
94
|
+
col = row[:which] ? :"resource_#{row[:which]+1}_count" : :"resource_1_count"
|
95
|
+
@groups_dataset.filter(:id => row[:group_id]).update(col => row[:count])
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def setup_pairs
|
102
|
+
@field_pairs = []
|
103
|
+
@phase_one_pairs = []
|
104
|
+
@phase_two_pairs = @type == 'self-linkage' ? nil : []
|
105
|
+
@matcher.comparisons.each do |comparison|
|
106
|
+
if !comparison.blocking?
|
107
|
+
fields = comparison.fields
|
108
|
+
@field_pairs << fields
|
109
|
+
@phase_one_pairs.push(fields.collect(&:name_sym))
|
110
|
+
if @phase_two_pairs
|
111
|
+
pair_sym = :"pair_#{@phase_two_pairs.length}"
|
112
|
+
@phase_two_pairs.push([pair_sym, pair_sym])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def create_tables
|
119
|
+
# Yes, this could be done during setup_pairs, but I think
|
120
|
+
# this is more clean. Also, there will only be very few pairs.
|
121
|
+
groups_columns = [
|
122
|
+
{:name => :id, :type => Integer, :primary_key => true}
|
123
|
+
]
|
124
|
+
@group_value_fields = []
|
125
|
+
@field_pairs.each_with_index do |(field_1, field_2), i|
|
126
|
+
type_1 = field_1.local_column_options[:type]
|
127
|
+
type_2 = field_2.local_column_options[:type]
|
128
|
+
if type_1 != type_2
|
129
|
+
# FIXME!
|
130
|
+
raise "BOOM!!"
|
131
|
+
end
|
132
|
+
@group_value_fields.push([field_1.name_sym, field_2.name_sym])
|
133
|
+
groups_columns.push({:name => :"pair_#{i}", :type => type_1})
|
134
|
+
end
|
135
|
+
@groups_column_names = groups_columns.collect { |c| c[:name] }
|
136
|
+
@groups_table_name = :"groups_#{@run_number}"
|
137
|
+
|
138
|
+
# Add extra columns to the groups table for summary stats
|
139
|
+
@resources.length.times do |i|
|
140
|
+
groups_columns << {:name => :"resource_#{i+1}_count", :type => Integer}
|
141
|
+
end
|
142
|
+
|
143
|
+
key_types = @resources.collect { |r| r.primary_key_type }.uniq
|
144
|
+
record_id_type = key_types.length == 1 ? key_types[0] : String
|
145
|
+
@join_table_name = :"groups_records_#{@run_number}"
|
146
|
+
|
147
|
+
@parent.local_database do |scenario_db|
|
148
|
+
scenario_db.create_table!(@groups_table_name) do
|
149
|
+
columns.push(*groups_columns)
|
150
|
+
end
|
151
|
+
scenario_db.create_table!(@join_table_name) do
|
152
|
+
column :record_id, record_id_type
|
153
|
+
Integer :which
|
154
|
+
Integer :group_id, :index => true
|
155
|
+
index [:group_id, :which, :record_id] # speedy filtering when showing results
|
156
|
+
end
|
157
|
+
if @type != 'self-linkage'
|
158
|
+
# Need another groups table
|
159
|
+
@secondary_groups_table_name = :"groups_groups_#{@run_number}"
|
160
|
+
scenario_db.create_table!(@secondary_groups_table_name) do
|
161
|
+
Integer :group_1_id
|
162
|
+
Integer :group_2_id
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def phase_one_thread(dataset, primary_key, which)
|
169
|
+
thread = Thread.new do
|
170
|
+
# Apply filters and what not from comparisons
|
171
|
+
dataset = dataset.select(primary_key)
|
172
|
+
@matcher.comparisons.each do |comparison|
|
173
|
+
dataset = comparison.apply(dataset, which)
|
174
|
+
end
|
175
|
+
dataset = dataset.order_more(primary_key)
|
176
|
+
|
177
|
+
# Do the work
|
178
|
+
local_tw = ThreadsWait.new
|
179
|
+
threads = []
|
180
|
+
count = dataset.count
|
181
|
+
segments = count / LIMIT
|
182
|
+
segments += 1 if count % LIMIT > 0
|
183
|
+
|
184
|
+
segments.times do |segment_num|
|
185
|
+
ds = dataset.limit(LIMIT, LIMIT * segment_num)
|
186
|
+
thread = Thread.new do
|
187
|
+
prev_row = nil
|
188
|
+
group_id = nil
|
189
|
+
|
190
|
+
ds.each_with_index do |row, row_num|
|
191
|
+
if row_num > 0
|
192
|
+
result = compare_rows(prev_row, row, which)
|
193
|
+
if (result || which) && group_id.nil?
|
194
|
+
# If `which` is not nil, that means we're in the first
|
195
|
+
# stage of a dual or cross linkage. So, we should save
|
196
|
+
# groups that only have 1 record in them.
|
197
|
+
group_id = create_group(prev_row, which)
|
198
|
+
@join_buffer.add([prev_row[primary_key], which, group_id])
|
199
|
+
end
|
200
|
+
if result
|
201
|
+
@join_buffer.add([row[primary_key], which, group_id])
|
202
|
+
else
|
203
|
+
group_id = nil
|
204
|
+
end
|
205
|
+
|
206
|
+
# This stores the first record of this segment in order
|
207
|
+
# to check it against the last record of the previous
|
208
|
+
# segment. I don't like putting this inside the loop
|
209
|
+
# really, but it's better than making another database
|
210
|
+
# query.
|
211
|
+
if segment_num > 0 && row_num == 1
|
212
|
+
Thread.current[:head] = { :row => prev_row, :group_id => group_id }
|
213
|
+
end
|
214
|
+
end
|
215
|
+
prev_row = row
|
216
|
+
end
|
217
|
+
if which && group_id.nil?
|
218
|
+
# See above comment about `which`
|
219
|
+
group_id = create_group(prev_row, which)
|
220
|
+
@join_buffer.add([prev_row[primary_key], which, group_id])
|
221
|
+
end
|
222
|
+
|
223
|
+
# This stores the last record of this segment in order to
|
224
|
+
# check it against the first record of the next segment.
|
225
|
+
if segment_num < (segments - 1)
|
226
|
+
Thread.current[:tail] = { :row => prev_row, :group_id => group_id }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
thread.abort_on_exception = true
|
230
|
+
threads << thread
|
231
|
+
local_tw.join_nowait(thread)
|
232
|
+
local_tw.next_wait if local_tw.threads.length == 10
|
233
|
+
end
|
234
|
+
local_tw.all_waits
|
235
|
+
@groups_buffer.flush
|
236
|
+
@join_buffer.flush
|
237
|
+
|
238
|
+
# Compare heads and tails from segments
|
239
|
+
if segments > 1
|
240
|
+
head = tail = nil
|
241
|
+
threads.each do |thread|
|
242
|
+
if tail
|
243
|
+
head = thread[:head]
|
244
|
+
if compare_rows(tail, head, which)
|
245
|
+
@join_dataset.filter({
|
246
|
+
:group_id => head[:group_id]
|
247
|
+
}).update(:group_id => tail[:group_id])
|
248
|
+
@groups_dataset.filter({
|
249
|
+
:id => head[:group_id]
|
250
|
+
}).delete
|
251
|
+
end
|
252
|
+
end
|
253
|
+
tail = thread[:tail]
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
thread.abort_on_exception = true
|
258
|
+
thread
|
259
|
+
end
|
260
|
+
|
261
|
+
def phase_two(dataset, primary_key)
|
262
|
+
# Add sorting only
|
263
|
+
@pairs.each do |fields|
|
264
|
+
dataset = dataset.order_more(*fields.uniq)
|
265
|
+
end
|
266
|
+
dataset = dataset.order_more(primary_key)
|
267
|
+
|
268
|
+
local_tw = ThreadsWait.new
|
269
|
+
threads = []
|
270
|
+
count = dataset.count
|
271
|
+
segments = count / LIMIT
|
272
|
+
segments += 1 if count % LIMIT > 0
|
273
|
+
|
274
|
+
segments.times do |segment_num|
|
275
|
+
ds = dataset.limit(LIMIT, LIMIT * segment_num)
|
276
|
+
thread = Thread.new do
|
277
|
+
prev_result = nil
|
278
|
+
prev_row = nil
|
279
|
+
skip = false
|
280
|
+
|
281
|
+
ds.each_with_index do |row, row_num|
|
282
|
+
if row_num > 0 && !skip
|
283
|
+
result = compare_rows(prev_row, row)
|
284
|
+
# There should only be one-to-one matches here.
|
285
|
+
if result
|
286
|
+
@join_buffer.add([prev_row[primary_key], row[primary_key]])
|
287
|
+
skip = true # Skip comparing this row to the next
|
288
|
+
end
|
289
|
+
|
290
|
+
if segment_num > 0 && row_num == 1 && !result
|
291
|
+
Thread.current[:head] = { :row => prev_row }
|
292
|
+
end
|
293
|
+
elsif skip
|
294
|
+
skip = false
|
295
|
+
end
|
296
|
+
prev_row = row
|
297
|
+
prev_result = result
|
298
|
+
end
|
299
|
+
if segment_num < (segments - 1) && !prev_result
|
300
|
+
Thread.current[:tail] = { :row => prev_row }
|
301
|
+
end
|
302
|
+
end
|
303
|
+
thread.abort_on_exception = true
|
304
|
+
threads << thread
|
305
|
+
local_tw.join_nowait(thread)
|
306
|
+
local_tw.next_wait if local_tw.threads.length == 10
|
307
|
+
end
|
308
|
+
local_tw.all_waits
|
309
|
+
@join_buffer.flush
|
310
|
+
|
311
|
+
# Compare heads and tails from segments
|
312
|
+
if threads.length > 1
|
313
|
+
head = tail = nil
|
314
|
+
threads.each do |thread|
|
315
|
+
if tail
|
316
|
+
head = thread[:head]
|
317
|
+
if head && compare_rows(tail[:row], head[:row])
|
318
|
+
@join_buffer.add([tail[:row][primary_key], head[:row][primary_key]])
|
319
|
+
end
|
320
|
+
end
|
321
|
+
tail = thread[:tail]
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
def compare_rows(row_1, row_2, which = nil)
|
327
|
+
values = []
|
328
|
+
@pairs.each do |fields|
|
329
|
+
value_1 = row_1[fields[which || 0]]
|
330
|
+
value_2 = row_2[fields[which || 1]]
|
331
|
+
if value_1 == value_2
|
332
|
+
values << value_1
|
333
|
+
else
|
334
|
+
values = nil
|
335
|
+
break
|
336
|
+
end
|
337
|
+
end
|
338
|
+
values
|
339
|
+
end
|
340
|
+
|
341
|
+
def create_group(row, which)
|
342
|
+
group_id = get_next_group_id
|
343
|
+
group_row = [group_id]
|
344
|
+
@group_value_fields.each do |fields|
|
345
|
+
group_row.push(row[fields[which || 0]])
|
346
|
+
end
|
347
|
+
@groups_buffer.add(group_row)
|
348
|
+
group_id
|
349
|
+
end
|
350
|
+
|
351
|
+
def get_next_group_id
|
352
|
+
@mutex.synchronize { @group_number += 1 }
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module Coupler
|
2
|
+
module Models
|
3
|
+
class Scenario < Sequel::Model
|
4
|
+
class NoMatcherError < Exception; end
|
5
|
+
class ResourcesOutOfDateError < Exception; end
|
6
|
+
|
7
|
+
include CommonModel
|
8
|
+
include Jobify
|
9
|
+
|
10
|
+
attr_writer :resource_ids
|
11
|
+
many_to_one :project
|
12
|
+
many_to_one :resource_1, :class => "Coupler::Models::Resource"
|
13
|
+
many_to_one :resource_2, :class => "Coupler::Models::Resource"
|
14
|
+
one_to_one :matcher
|
15
|
+
one_to_many :results
|
16
|
+
|
17
|
+
def self.count_by_project
|
18
|
+
dataset.naked.group_and_count(:project_id).to_hash(:project_id, :count)
|
19
|
+
end
|
20
|
+
|
21
|
+
def status
|
22
|
+
if matcher.nil?
|
23
|
+
"no_matcher"
|
24
|
+
elsif resources.any? { |r| r.status == "out_of_date" }
|
25
|
+
"resources_out_of_date"
|
26
|
+
else
|
27
|
+
"ok"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def resources
|
32
|
+
if resource_1
|
33
|
+
resource_2 ? [resource_1, resource_2] : [resource_1]
|
34
|
+
else
|
35
|
+
[]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def local_database(&block)
|
40
|
+
Sequel.connect(local_connection_string, {
|
41
|
+
:loggers => [Coupler::Logger.instance],
|
42
|
+
:max_connections => 50,
|
43
|
+
:pool_timeout => 60
|
44
|
+
}, &block)
|
45
|
+
end
|
46
|
+
|
47
|
+
def run!(&progress)
|
48
|
+
case status
|
49
|
+
when 'no_matcher' then raise NoMatcherError
|
50
|
+
when 'resources_out_of_date' then raise ResourcesOutOfDateError
|
51
|
+
end
|
52
|
+
|
53
|
+
runner = Runner.new(self, &progress)
|
54
|
+
runner.run!
|
55
|
+
|
56
|
+
update(:run_count => run_count + 1, :last_run_at => Time.now)
|
57
|
+
result = Result.new(:scenario => self, :run_number => run_count)
|
58
|
+
result.save
|
59
|
+
end
|
60
|
+
|
61
|
+
def set_linkage_type
|
62
|
+
if resource_1
|
63
|
+
if resource_2
|
64
|
+
self.linkage_type = "dual-linkage"
|
65
|
+
elsif matcher && matcher.cross_match?
|
66
|
+
self.linkage_type = "cross-linkage"
|
67
|
+
else
|
68
|
+
self.linkage_type = "self-linkage"
|
69
|
+
end
|
70
|
+
else
|
71
|
+
self.linkage_type = "N/A"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def local_connection_string
|
78
|
+
Base.connection_string("scenario_#{id}")
|
79
|
+
end
|
80
|
+
|
81
|
+
def before_validation
|
82
|
+
super
|
83
|
+
if @resource_ids.is_a?(Array)
|
84
|
+
objects = project.resources_dataset.filter(:id => @resource_ids[0..1].compact).all
|
85
|
+
self.resource_1_id = objects[0].nil? ? nil : objects[0].id
|
86
|
+
self.resource_2_id = objects[1].nil? ? nil : objects[1].id
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def validate
|
91
|
+
super
|
92
|
+
validates_presence :name
|
93
|
+
validates_unique [:name, :project_id]
|
94
|
+
if resource_1_id.nil?
|
95
|
+
errors.add(:base, "At least one resource is required")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def before_create
|
100
|
+
super
|
101
|
+
self.slug ||= name.downcase.gsub(/\s+/, "_")
|
102
|
+
set_linkage_type
|
103
|
+
end
|
104
|
+
|
105
|
+
# There is currently no plan to let a user change resources
|
106
|
+
# after a scenario has been created.
|
107
|
+
#def before_update
|
108
|
+
#super
|
109
|
+
#set_linkage_type
|
110
|
+
#end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
require File.join(File.dirname(__FILE__), 'scenario', 'runner')
|
@@ -0,0 +1,117 @@
|
|
1
|
+
module Coupler
|
2
|
+
module Models
|
3
|
+
class Transformation < Sequel::Model
|
4
|
+
include CommonModel
|
5
|
+
many_to_one :resource
|
6
|
+
many_to_one :source_field, :class => Field
|
7
|
+
many_to_one :result_field, :class => Field
|
8
|
+
many_to_one :transformer
|
9
|
+
|
10
|
+
plugin :nested_attributes
|
11
|
+
nested_attributes :result_field
|
12
|
+
nested_attributes :transformer, :destroy => false
|
13
|
+
|
14
|
+
def transform(data)
|
15
|
+
transformer.transform(data, {
|
16
|
+
:in => source_field.name.to_sym,
|
17
|
+
:out => result_field.name.to_sym
|
18
|
+
})
|
19
|
+
end
|
20
|
+
|
21
|
+
def field_changes
|
22
|
+
transformer.field_changes(source_field)
|
23
|
+
end
|
24
|
+
|
25
|
+
# NOTE: The fact that the aliased name doesn't have an = at the end is
|
26
|
+
# important. Ruby methods with names that have = at the end always
|
27
|
+
# return the RHS value, regardless of what the method actually returns.
|
28
|
+
# The only way to grab the associated object that gets created from the
|
29
|
+
# nested attributes methods is by fetching the return value.
|
30
|
+
#
|
31
|
+
alias :original_result_field_attributes :result_field_attributes=
|
32
|
+
def result_field_attributes=(h)
|
33
|
+
@staged_result_field = self.original_result_field_attributes(h.merge({
|
34
|
+
:is_generated => true
|
35
|
+
}))
|
36
|
+
end
|
37
|
+
|
38
|
+
def deletable?
|
39
|
+
position == self.class.max(:position) &&
|
40
|
+
(result_field.nil? || !result_field.is_generated || result_field.scenarios_dataset.count == 0)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def before_validation
|
45
|
+
super
|
46
|
+
if source_field_id && !result_field_id && !@staged_result_field
|
47
|
+
self.result_field_id = source_field_id
|
48
|
+
end
|
49
|
+
|
50
|
+
if @staged_result_field && transformer && source_field && resource_id
|
51
|
+
hash = transformer.field_changes(source_field).values[0]
|
52
|
+
if hash.empty?
|
53
|
+
hash.update({
|
54
|
+
:type => source_field[:type],
|
55
|
+
:db_type => source_field[:db_type]
|
56
|
+
})
|
57
|
+
end
|
58
|
+
hash[:resource_id] = resource_id
|
59
|
+
@staged_result_field.set(hash)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def validate
|
64
|
+
super
|
65
|
+
validates_presence [:resource_id, :source_field_id]
|
66
|
+
if transformer.nil?
|
67
|
+
errors.add(:transformer_id, "is not present")
|
68
|
+
end
|
69
|
+
if errors.empty?
|
70
|
+
source_field = resource.fields_dataset[:id => source_field_id]
|
71
|
+
if source_field.nil?
|
72
|
+
errors.add(:source_field_id, "is invalid")
|
73
|
+
else
|
74
|
+
if transformer.allowed_types.is_a?(Array) && !transformer.allowed_types.include?(source_field.final_type)
|
75
|
+
errors.add(:base, "#{transformer.name} cannot transform type '#{source_field.final_type}'")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if !@staged_result_field
|
80
|
+
result_field = result_field_id ? resource.fields_dataset[:id => result_field_id] : nil
|
81
|
+
if result_field.nil?
|
82
|
+
errors.add(:result_field_id, "is invalid")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def before_create
|
89
|
+
super
|
90
|
+
self.position ||= self.class.filter(:resource_id => resource_id).count + 1
|
91
|
+
end
|
92
|
+
|
93
|
+
def after_save
|
94
|
+
super
|
95
|
+
resource.refresh_fields!
|
96
|
+
end
|
97
|
+
|
98
|
+
def before_destroy
|
99
|
+
# Prevent all but the last transformation from being destroyed
|
100
|
+
#
|
101
|
+
# TODO: I'm putting off programming the complex logic required to
|
102
|
+
# enable deletion from the middle of a transformation stack.
|
103
|
+
#
|
104
|
+
super
|
105
|
+
deletable?
|
106
|
+
end
|
107
|
+
|
108
|
+
def after_destroy
|
109
|
+
super
|
110
|
+
if result_field && result_field.is_generated && self.class.filter(:result_field_id => result_field.id).count == 0
|
111
|
+
result_field.destroy
|
112
|
+
end
|
113
|
+
resource.refresh_fields! if resource
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Coupler
|
2
|
+
module Models
|
3
|
+
class Transformer
|
4
|
+
class Runner
|
5
|
+
instance_methods.each do |m|
|
6
|
+
undef_method m unless m =~ /^__|^instance_eval$/
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(code, input)
|
10
|
+
@input = input
|
11
|
+
@code = code
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
instance_eval(@code, __FILE__, __LINE__)
|
16
|
+
end
|
17
|
+
|
18
|
+
def value
|
19
|
+
@input
|
20
|
+
end
|
21
|
+
|
22
|
+
def method_missing(name)
|
23
|
+
raise NoMethodError
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|