coupler 0.0.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rvmrc +1 -0
- data/.vimrc +40 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +71 -0
- data/LICENSE +20 -0
- data/NOTES +6 -0
- data/README.rdoc +18 -0
- data/Rakefile +42 -0
- data/TODO +11 -0
- data/VERSION +1 -0
- data/bin/coupler +7 -0
- data/db/.gitignore +6 -0
- data/db/migrate/001_initial_schema.rb +166 -0
- data/db/migrate/002_stub.rb +4 -0
- data/db/migrate/003_stub.rb +4 -0
- data/db/migrate/004_create_comparisons.rb +28 -0
- data/db/migrate/005_move_database_name.rb +19 -0
- data/db/migrate/006_upgrade_comparisons.rb +34 -0
- data/db/migrate/007_add_which_to_comparisons.rb +23 -0
- data/db/migrate/008_add_result_field_to_transformations.rb +33 -0
- data/db/migrate/009_add_generated_flag_to_fields.rb +13 -0
- data/db/migrate/010_create_imports.rb +24 -0
- data/db/migrate/011_add_primary_key_type.rb +13 -0
- data/db/migrate/012_add_transformed_with_to_resources.rb +13 -0
- data/db/migrate/013_add_run_count_to_scenarios.rb +13 -0
- data/db/migrate/014_add_last_accessed_at_to_some_tables.rb +13 -0
- data/db/migrate/015_add_run_number_to_results.rb +15 -0
- data/db/migrate/016_fix_scenario_run_count.rb +27 -0
- data/db/migrate/017_rename_comparison_columns.rb +14 -0
- data/db/migrate/018_fix_scenario_linkage_type.rb +8 -0
- data/db/migrate/019_add_columns_to_imports.rb +24 -0
- data/db/migrate/020_rename_import_columns.rb +12 -0
- data/db/migrate/021_add_fields_to_connections.rb +15 -0
- data/db/migrate/022_remove_database_name_from_resources.rb +11 -0
- data/features/connections.feature +28 -0
- data/features/matchers.feature +35 -0
- data/features/projects.feature +11 -0
- data/features/resources.feature +62 -0
- data/features/scenarios.feature +45 -0
- data/features/step_definitions/coupler_steps.rb +145 -0
- data/features/step_definitions/matchers_steps.rb +26 -0
- data/features/step_definitions/resources_steps.rb +12 -0
- data/features/step_definitions/scenarios_steps.rb +7 -0
- data/features/step_definitions/transformations_steps.rb +3 -0
- data/features/support/env.rb +128 -0
- data/features/transformations.feature +22 -0
- data/features/wizard.feature +10 -0
- data/gfx/coupler-header.svg +213 -0
- data/gfx/coupler-sidebar.svg +656 -0
- data/gfx/coupler.svg +184 -0
- data/gfx/icon.svg +75 -0
- data/lib/coupler/base.rb +63 -0
- data/lib/coupler/config.rb +128 -0
- data/lib/coupler/data_uploader.rb +20 -0
- data/lib/coupler/database.rb +31 -0
- data/lib/coupler/extensions/connections.rb +57 -0
- data/lib/coupler/extensions/exceptions.rb +58 -0
- data/lib/coupler/extensions/imports.rb +43 -0
- data/lib/coupler/extensions/jobs.rb +21 -0
- data/lib/coupler/extensions/matchers.rb +64 -0
- data/lib/coupler/extensions/projects.rb +62 -0
- data/lib/coupler/extensions/resources.rb +89 -0
- data/lib/coupler/extensions/results.rb +100 -0
- data/lib/coupler/extensions/scenarios.rb +50 -0
- data/lib/coupler/extensions/transformations.rb +70 -0
- data/lib/coupler/extensions/transformers.rb +58 -0
- data/lib/coupler/extensions.rb +16 -0
- data/lib/coupler/helpers.rb +121 -0
- data/lib/coupler/import_buffer.rb +48 -0
- data/lib/coupler/logger.rb +16 -0
- data/lib/coupler/models/common_model.rb +104 -0
- data/lib/coupler/models/comparison.rb +166 -0
- data/lib/coupler/models/connection.rb +59 -0
- data/lib/coupler/models/field.rb +55 -0
- data/lib/coupler/models/import.rb +238 -0
- data/lib/coupler/models/job.rb +42 -0
- data/lib/coupler/models/jobify.rb +17 -0
- data/lib/coupler/models/matcher.rb +36 -0
- data/lib/coupler/models/project.rb +40 -0
- data/lib/coupler/models/resource.rb +287 -0
- data/lib/coupler/models/result.rb +92 -0
- data/lib/coupler/models/scenario/runner.rb +357 -0
- data/lib/coupler/models/scenario.rb +115 -0
- data/lib/coupler/models/transformation.rb +117 -0
- data/lib/coupler/models/transformer/runner.rb +28 -0
- data/lib/coupler/models/transformer.rb +110 -0
- data/lib/coupler/models.rb +30 -0
- data/lib/coupler/runner.rb +76 -0
- data/lib/coupler/scheduler.rb +56 -0
- data/lib/coupler.rb +34 -0
- data/log/.gitignore +1 -0
- data/misc/README +5 -0
- data/misc/jruby-json.license +57 -0
- data/misc/rack-flash.license +22 -0
- data/script/dbconsole.rb +5 -0
- data/src/edu/vanderbilt/coupler/Main.java +116 -0
- data/src/edu/vanderbilt/coupler/jruby.properties +1 -0
- data/tasks/annotations.rake +84 -0
- data/tasks/db.rake +120 -0
- data/tasks/environment.rake +12 -0
- data/tasks/jeweler.rake +43 -0
- data/tasks/package.rake +58 -0
- data/tasks/rdoc.rake +13 -0
- data/tasks/test.rake +63 -0
- data/tasks/vendor.rake +43 -0
- data/test/README.txt +6 -0
- data/test/config.yml +9 -0
- data/test/coupler/models/test_import.rb +221 -0
- data/test/factories.rb +91 -0
- data/test/fixtures/duplicate-keys.csv +5 -0
- data/test/fixtures/no-headers.csv +50 -0
- data/test/fixtures/people.csv +51 -0
- data/test/fixtures/varying-row-size.csv +4 -0
- data/test/helper.rb +156 -0
- data/test/integration/extensions/test_connections.rb +80 -0
- data/test/integration/extensions/test_imports.rb +94 -0
- data/test/integration/extensions/test_jobs.rb +52 -0
- data/test/integration/extensions/test_matchers.rb +134 -0
- data/test/integration/extensions/test_projects.rb +82 -0
- data/test/integration/extensions/test_resources.rb +150 -0
- data/test/integration/extensions/test_results.rb +89 -0
- data/test/integration/extensions/test_scenarios.rb +88 -0
- data/test/integration/extensions/test_transformations.rb +113 -0
- data/test/integration/extensions/test_transformers.rb +80 -0
- data/test/integration/test_field.rb +45 -0
- data/test/integration/test_import.rb +78 -0
- data/test/integration/test_running_scenarios.rb +379 -0
- data/test/integration/test_transformation.rb +56 -0
- data/test/integration/test_transforming.rb +154 -0
- data/test/table_sets.rb +76 -0
- data/test/unit/models/test_common_model.rb +130 -0
- data/test/unit/models/test_comparison.rb +619 -0
- data/test/unit/models/test_connection.rb +115 -0
- data/test/unit/models/test_field.rb +99 -0
- data/test/unit/models/test_import.rb +130 -0
- data/test/unit/models/test_job.rb +115 -0
- data/test/unit/models/test_matcher.rb +82 -0
- data/test/unit/models/test_project.rb +102 -0
- data/test/unit/models/test_resource.rb +564 -0
- data/test/unit/models/test_result.rb +90 -0
- data/test/unit/models/test_scenario.rb +199 -0
- data/test/unit/models/test_transformation.rb +193 -0
- data/test/unit/models/test_transformer.rb +188 -0
- data/test/unit/test_base.rb +60 -0
- data/test/unit/test_data_uploader.rb +27 -0
- data/test/unit/test_database.rb +23 -0
- data/test/unit/test_helpers.rb +58 -0
- data/test/unit/test_logger.rb +10 -0
- data/test/unit/test_models.rb +12 -0
- data/test/unit/test_runner.rb +76 -0
- data/test/unit/test_scheduler.rb +66 -0
- data/uploads/.gitignore +2 -0
- data/vendor/java/.gitignore +5 -0
- data/webroot/public/css/960.css +1 -0
- data/webroot/public/css/dataTables.css +1057 -0
- data/webroot/public/css/jquery-ui.css +572 -0
- data/webroot/public/css/jquery.treeview.css +68 -0
- data/webroot/public/css/reset.css +1 -0
- data/webroot/public/css/style.css +504 -0
- data/webroot/public/css/text.css +1 -0
- data/webroot/public/favicon.ico +0 -0
- data/webroot/public/images/12_col.gif +0 -0
- data/webroot/public/images/16_col.gif +0 -0
- data/webroot/public/images/add.png +0 -0
- data/webroot/public/images/ajax-loader.gif +0 -0
- data/webroot/public/images/cog.png +0 -0
- data/webroot/public/images/coupler.png +0 -0
- data/webroot/public/images/foo.png +0 -0
- data/webroot/public/images/hammer.png +0 -0
- data/webroot/public/images/header.png +0 -0
- data/webroot/public/images/home.gif +0 -0
- data/webroot/public/images/jobs.gif +0 -0
- data/webroot/public/images/sidebar-bottom.png +0 -0
- data/webroot/public/images/sidebar.png +0 -0
- data/webroot/public/images/treeview-default-line.gif +0 -0
- data/webroot/public/images/treeview-default.gif +0 -0
- data/webroot/public/images/ui-anim_basic_16x16.gif +0 -0
- data/webroot/public/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/webroot/public/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/webroot/public/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/webroot/public/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/webroot/public/images/ui-bg_highlight-hard_30_565356_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-hard_75_888588_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_30_6e3b3a_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_35_8e8b8e_1x100.png +0 -0
- data/webroot/public/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/webroot/public/images/ui-icons_222222_256x240.png +0 -0
- data/webroot/public/images/ui-icons_2e83ff_256x240.png +0 -0
- data/webroot/public/images/ui-icons_454545_256x240.png +0 -0
- data/webroot/public/images/ui-icons_888888_256x240.png +0 -0
- data/webroot/public/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/webroot/public/images/ui-icons_ffffff_256x240.png +0 -0
- data/webroot/public/js/ajaxupload.js +673 -0
- data/webroot/public/js/application.js +40 -0
- data/webroot/public/js/jquery-ui.combobox.js +98 -0
- data/webroot/public/js/jquery-ui.js +9867 -0
- data/webroot/public/js/jquery-ui.min.js +559 -0
- data/webroot/public/js/jquery.dataTables.min.js +587 -0
- data/webroot/public/js/jquery.min.js +154 -0
- data/webroot/public/js/jquery.timeago.js +140 -0
- data/webroot/public/js/jquery.tooltip.min.js +19 -0
- data/webroot/public/js/jquery.treeview.min.js +15 -0
- data/webroot/public/js/resource.js +11 -0
- data/webroot/public/js/results.js +56 -0
- data/webroot/public/js/transformations.js +95 -0
- data/webroot/views/connections/index.erb +5 -0
- data/webroot/views/connections/list.erb +34 -0
- data/webroot/views/connections/new.erb +55 -0
- data/webroot/views/connections/show.erb +36 -0
- data/webroot/views/imports/edit.erb +60 -0
- data/webroot/views/imports/form.erb +81 -0
- data/webroot/views/imports/new.erb +89 -0
- data/webroot/views/index.erb +12 -0
- data/webroot/views/jobs/index.erb +7 -0
- data/webroot/views/jobs/list.erb +24 -0
- data/webroot/views/layout.erb +38 -0
- data/webroot/views/matchers/form.erb +250 -0
- data/webroot/views/matchers/list.erb +32 -0
- data/webroot/views/projects/form.erb +14 -0
- data/webroot/views/projects/index.erb +96 -0
- data/webroot/views/projects/show.erb +24 -0
- data/webroot/views/resources/edit.erb +88 -0
- data/webroot/views/resources/index.erb +5 -0
- data/webroot/views/resources/list.erb +27 -0
- data/webroot/views/resources/new.erb +121 -0
- data/webroot/views/resources/show.erb +86 -0
- data/webroot/views/resources/transform.erb +2 -0
- data/webroot/views/results/csv.erb +12 -0
- data/webroot/views/results/details.erb +15 -0
- data/webroot/views/results/index.erb +2 -0
- data/webroot/views/results/list.erb +22 -0
- data/webroot/views/results/record.erb +24 -0
- data/webroot/views/results/show.erb +68 -0
- data/webroot/views/scenarios/index.erb +5 -0
- data/webroot/views/scenarios/list.erb +20 -0
- data/webroot/views/scenarios/new.erb +99 -0
- data/webroot/views/scenarios/run.erb +2 -0
- data/webroot/views/scenarios/show.erb +50 -0
- data/webroot/views/sidebar.erb +106 -0
- data/webroot/views/transformations/create.erb +115 -0
- data/webroot/views/transformations/for.erb +16 -0
- data/webroot/views/transformations/index.erb +2 -0
- data/webroot/views/transformations/list.erb +29 -0
- data/webroot/views/transformations/new.erb +126 -0
- data/webroot/views/transformations/preview.erb +46 -0
- data/webroot/views/transformers/edit.erb +6 -0
- data/webroot/views/transformers/form.erb +58 -0
- data/webroot/views/transformers/index.erb +2 -0
- data/webroot/views/transformers/list.erb +25 -0
- data/webroot/views/transformers/new.erb +5 -0
- data/webroot/views/transformers/preview.erb +23 -0
- data/webroot/views/transformers/show.erb +0 -0
- metadata +558 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
module Coupler
|
|
2
|
+
module Models
|
|
3
|
+
class Scenario
|
|
4
|
+
class Runner
|
|
5
|
+
LIMIT = 10000
|
|
6
|
+
|
|
7
|
+
def initialize(parent, &progress)
|
|
8
|
+
@parent = parent
|
|
9
|
+
@progress = progress
|
|
10
|
+
@matcher = parent.matcher
|
|
11
|
+
@type = parent.linkage_type
|
|
12
|
+
if @type == 'cross-linkage'
|
|
13
|
+
@resources = [parent.resource_1, parent.resource_1]
|
|
14
|
+
else
|
|
15
|
+
@resources = parent.resources
|
|
16
|
+
end
|
|
17
|
+
@run_number = @parent.run_count + 1
|
|
18
|
+
@mutex = Mutex.new
|
|
19
|
+
@group_number = 0
|
|
20
|
+
setup_pairs
|
|
21
|
+
create_tables
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def run!
|
|
25
|
+
@parent.local_database do |scenario_db|
|
|
26
|
+
@groups_dataset = scenario_db[@groups_table_name]
|
|
27
|
+
@groups_buffer = ImportBuffer.new(@groups_column_names, @groups_dataset)
|
|
28
|
+
@join_dataset = scenario_db[@join_table_name]
|
|
29
|
+
@join_buffer = ImportBuffer.new([:record_id, :which, :group_id], @join_dataset)
|
|
30
|
+
|
|
31
|
+
# Group records for each dataset. This step is the same for both
|
|
32
|
+
# self-linkage and dual-linkage. However, this is the only step
|
|
33
|
+
# for self-linkage. Dual-linkage requires another pass after
|
|
34
|
+
# this.
|
|
35
|
+
#
|
|
36
|
+
# Cross-matching on a single dataset is treated as a dual-linkage.
|
|
37
|
+
#
|
|
38
|
+
@pairs = @phase_one_pairs
|
|
39
|
+
tw = ThreadsWait.new
|
|
40
|
+
databases_to_close = []
|
|
41
|
+
@resources.each_with_index do |resource, i|
|
|
42
|
+
dataset = resource.final_dataset
|
|
43
|
+
databases_to_close << dataset.db
|
|
44
|
+
primary_key = resource.primary_key_sym
|
|
45
|
+
which = @type == 'self-linkage' ? nil : i
|
|
46
|
+
resource_thread = phase_one_thread(dataset, primary_key, which)
|
|
47
|
+
tw.join_nowait(resource_thread)
|
|
48
|
+
end
|
|
49
|
+
tw.all_waits
|
|
50
|
+
databases_to_close.each do |db| # tidy up
|
|
51
|
+
db.disconnect
|
|
52
|
+
::Sequel::DATABASES.delete(db)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
if @type != 'self-linkage'
|
|
56
|
+
# Phase 2!
|
|
57
|
+
secondary_groups_ds = scenario_db[@secondary_groups_table_name].order(:group_1_id, :group_2_id)
|
|
58
|
+
@join_buffer = ImportBuffer.new([:group_1_id, :group_2_id], secondary_groups_ds)
|
|
59
|
+
@pairs = @phase_two_pairs
|
|
60
|
+
phase_two(@groups_dataset, :id)
|
|
61
|
+
@join_buffer.flush
|
|
62
|
+
|
|
63
|
+
# Update groups and groups_records
|
|
64
|
+
tw = ThreadsWait.new
|
|
65
|
+
count = secondary_groups_ds.count
|
|
66
|
+
offset = 0
|
|
67
|
+
last_group_id = @group_number
|
|
68
|
+
while offset < count
|
|
69
|
+
dataset = secondary_groups_ds.limit(10, offset)
|
|
70
|
+
offset += 10
|
|
71
|
+
dataset.each do |row|
|
|
72
|
+
thread = Thread.new(row[:group_1_id], row[:group_2_id]) do |group_1_id, group_2_id|
|
|
73
|
+
new_group_id = get_next_group_id
|
|
74
|
+
@join_dataset.filter(:group_id => [group_1_id, group_2_id]).update(:group_id => new_group_id)
|
|
75
|
+
@groups_dataset.filter(:id => group_1_id).update(:id => new_group_id)
|
|
76
|
+
@groups_dataset.filter(:id => group_2_id).delete
|
|
77
|
+
end
|
|
78
|
+
thread.abort_on_exception = true
|
|
79
|
+
tw.join_nowait(thread)
|
|
80
|
+
end
|
|
81
|
+
tw.all_waits
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Clean up groups and records that don't match
|
|
85
|
+
@groups_dataset.filter(:id <= last_group_id).delete
|
|
86
|
+
@join_dataset.filter(:group_id <= last_group_id).delete
|
|
87
|
+
|
|
88
|
+
# Don't need the secondary table anymore
|
|
89
|
+
scenario_db.drop_table(@secondary_groups_table_name)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Calculate some summary stats
|
|
93
|
+
@join_dataset.group_and_count(:group_id, :which).each do |row|
|
|
94
|
+
col = row[:which] ? :"resource_#{row[:which]+1}_count" : :"resource_1_count"
|
|
95
|
+
@groups_dataset.filter(:id => row[:group_id]).update(col => row[:count])
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
def setup_pairs
|
|
102
|
+
@field_pairs = []
|
|
103
|
+
@phase_one_pairs = []
|
|
104
|
+
@phase_two_pairs = @type == 'self-linkage' ? nil : []
|
|
105
|
+
@matcher.comparisons.each do |comparison|
|
|
106
|
+
if !comparison.blocking?
|
|
107
|
+
fields = comparison.fields
|
|
108
|
+
@field_pairs << fields
|
|
109
|
+
@phase_one_pairs.push(fields.collect(&:name_sym))
|
|
110
|
+
if @phase_two_pairs
|
|
111
|
+
pair_sym = :"pair_#{@phase_two_pairs.length}"
|
|
112
|
+
@phase_two_pairs.push([pair_sym, pair_sym])
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def create_tables
|
|
119
|
+
# Yes, this could be done during setup_pairs, but I think
|
|
120
|
+
# this is more clean. Also, there will only be very few pairs.
|
|
121
|
+
groups_columns = [
|
|
122
|
+
{:name => :id, :type => Integer, :primary_key => true}
|
|
123
|
+
]
|
|
124
|
+
@group_value_fields = []
|
|
125
|
+
@field_pairs.each_with_index do |(field_1, field_2), i|
|
|
126
|
+
type_1 = field_1.local_column_options[:type]
|
|
127
|
+
type_2 = field_2.local_column_options[:type]
|
|
128
|
+
if type_1 != type_2
|
|
129
|
+
# FIXME!
|
|
130
|
+
raise "BOOM!!"
|
|
131
|
+
end
|
|
132
|
+
@group_value_fields.push([field_1.name_sym, field_2.name_sym])
|
|
133
|
+
groups_columns.push({:name => :"pair_#{i}", :type => type_1})
|
|
134
|
+
end
|
|
135
|
+
@groups_column_names = groups_columns.collect { |c| c[:name] }
|
|
136
|
+
@groups_table_name = :"groups_#{@run_number}"
|
|
137
|
+
|
|
138
|
+
# Add extra columns to the groups table for summary stats
|
|
139
|
+
@resources.length.times do |i|
|
|
140
|
+
groups_columns << {:name => :"resource_#{i+1}_count", :type => Integer}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
key_types = @resources.collect { |r| r.primary_key_type }.uniq
|
|
144
|
+
record_id_type = key_types.length == 1 ? key_types[0] : String
|
|
145
|
+
@join_table_name = :"groups_records_#{@run_number}"
|
|
146
|
+
|
|
147
|
+
@parent.local_database do |scenario_db|
|
|
148
|
+
scenario_db.create_table!(@groups_table_name) do
|
|
149
|
+
columns.push(*groups_columns)
|
|
150
|
+
end
|
|
151
|
+
scenario_db.create_table!(@join_table_name) do
|
|
152
|
+
column :record_id, record_id_type
|
|
153
|
+
Integer :which
|
|
154
|
+
Integer :group_id, :index => true
|
|
155
|
+
index [:group_id, :which, :record_id] # speedy filtering when showing results
|
|
156
|
+
end
|
|
157
|
+
if @type != 'self-linkage'
|
|
158
|
+
# Need another groups table
|
|
159
|
+
@secondary_groups_table_name = :"groups_groups_#{@run_number}"
|
|
160
|
+
scenario_db.create_table!(@secondary_groups_table_name) do
|
|
161
|
+
Integer :group_1_id
|
|
162
|
+
Integer :group_2_id
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def phase_one_thread(dataset, primary_key, which)
|
|
169
|
+
thread = Thread.new do
|
|
170
|
+
# Apply filters and what not from comparisons
|
|
171
|
+
dataset = dataset.select(primary_key)
|
|
172
|
+
@matcher.comparisons.each do |comparison|
|
|
173
|
+
dataset = comparison.apply(dataset, which)
|
|
174
|
+
end
|
|
175
|
+
dataset = dataset.order_more(primary_key)
|
|
176
|
+
|
|
177
|
+
# Do the work
|
|
178
|
+
local_tw = ThreadsWait.new
|
|
179
|
+
threads = []
|
|
180
|
+
count = dataset.count
|
|
181
|
+
segments = count / LIMIT
|
|
182
|
+
segments += 1 if count % LIMIT > 0
|
|
183
|
+
|
|
184
|
+
segments.times do |segment_num|
|
|
185
|
+
ds = dataset.limit(LIMIT, LIMIT * segment_num)
|
|
186
|
+
thread = Thread.new do
|
|
187
|
+
prev_row = nil
|
|
188
|
+
group_id = nil
|
|
189
|
+
|
|
190
|
+
ds.each_with_index do |row, row_num|
|
|
191
|
+
if row_num > 0
|
|
192
|
+
result = compare_rows(prev_row, row, which)
|
|
193
|
+
if (result || which) && group_id.nil?
|
|
194
|
+
# If `which` is not nil, that means we're in the first
|
|
195
|
+
# stage of a dual or cross linkage. So, we should save
|
|
196
|
+
# groups that only have 1 record in them.
|
|
197
|
+
group_id = create_group(prev_row, which)
|
|
198
|
+
@join_buffer.add([prev_row[primary_key], which, group_id])
|
|
199
|
+
end
|
|
200
|
+
if result
|
|
201
|
+
@join_buffer.add([row[primary_key], which, group_id])
|
|
202
|
+
else
|
|
203
|
+
group_id = nil
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# This stores the first record of this segment in order
|
|
207
|
+
# to check it against the last record of the previous
|
|
208
|
+
# segment. I don't like putting this inside the loop
|
|
209
|
+
# really, but it's better than making another database
|
|
210
|
+
# query.
|
|
211
|
+
if segment_num > 0 && row_num == 1
|
|
212
|
+
Thread.current[:head] = { :row => prev_row, :group_id => group_id }
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
prev_row = row
|
|
216
|
+
end
|
|
217
|
+
if which && group_id.nil?
|
|
218
|
+
# See above comment about `which`
|
|
219
|
+
group_id = create_group(prev_row, which)
|
|
220
|
+
@join_buffer.add([prev_row[primary_key], which, group_id])
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# This stores the last record of this segment in order to
|
|
224
|
+
# check it against the first record of the next segment.
|
|
225
|
+
if segment_num < (segments - 1)
|
|
226
|
+
Thread.current[:tail] = { :row => prev_row, :group_id => group_id }
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
thread.abort_on_exception = true
|
|
230
|
+
threads << thread
|
|
231
|
+
local_tw.join_nowait(thread)
|
|
232
|
+
local_tw.next_wait if local_tw.threads.length == 10
|
|
233
|
+
end
|
|
234
|
+
local_tw.all_waits
|
|
235
|
+
@groups_buffer.flush
|
|
236
|
+
@join_buffer.flush
|
|
237
|
+
|
|
238
|
+
# Compare heads and tails from segments
|
|
239
|
+
if segments > 1
|
|
240
|
+
head = tail = nil
|
|
241
|
+
threads.each do |thread|
|
|
242
|
+
if tail
|
|
243
|
+
head = thread[:head]
|
|
244
|
+
if compare_rows(tail, head, which)
|
|
245
|
+
@join_dataset.filter({
|
|
246
|
+
:group_id => head[:group_id]
|
|
247
|
+
}).update(:group_id => tail[:group_id])
|
|
248
|
+
@groups_dataset.filter({
|
|
249
|
+
:id => head[:group_id]
|
|
250
|
+
}).delete
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
tail = thread[:tail]
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
thread.abort_on_exception = true
|
|
258
|
+
thread
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def phase_two(dataset, primary_key)
|
|
262
|
+
# Add sorting only
|
|
263
|
+
@pairs.each do |fields|
|
|
264
|
+
dataset = dataset.order_more(*fields.uniq)
|
|
265
|
+
end
|
|
266
|
+
dataset = dataset.order_more(primary_key)
|
|
267
|
+
|
|
268
|
+
local_tw = ThreadsWait.new
|
|
269
|
+
threads = []
|
|
270
|
+
count = dataset.count
|
|
271
|
+
segments = count / LIMIT
|
|
272
|
+
segments += 1 if count % LIMIT > 0
|
|
273
|
+
|
|
274
|
+
segments.times do |segment_num|
|
|
275
|
+
ds = dataset.limit(LIMIT, LIMIT * segment_num)
|
|
276
|
+
thread = Thread.new do
|
|
277
|
+
prev_result = nil
|
|
278
|
+
prev_row = nil
|
|
279
|
+
skip = false
|
|
280
|
+
|
|
281
|
+
ds.each_with_index do |row, row_num|
|
|
282
|
+
if row_num > 0 && !skip
|
|
283
|
+
result = compare_rows(prev_row, row)
|
|
284
|
+
# There should only be one-to-one matches here.
|
|
285
|
+
if result
|
|
286
|
+
@join_buffer.add([prev_row[primary_key], row[primary_key]])
|
|
287
|
+
skip = true # Skip comparing this row to the next
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
if segment_num > 0 && row_num == 1 && !result
|
|
291
|
+
Thread.current[:head] = { :row => prev_row }
|
|
292
|
+
end
|
|
293
|
+
elsif skip
|
|
294
|
+
skip = false
|
|
295
|
+
end
|
|
296
|
+
prev_row = row
|
|
297
|
+
prev_result = result
|
|
298
|
+
end
|
|
299
|
+
if segment_num < (segments - 1) && !prev_result
|
|
300
|
+
Thread.current[:tail] = { :row => prev_row }
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
thread.abort_on_exception = true
|
|
304
|
+
threads << thread
|
|
305
|
+
local_tw.join_nowait(thread)
|
|
306
|
+
local_tw.next_wait if local_tw.threads.length == 10
|
|
307
|
+
end
|
|
308
|
+
local_tw.all_waits
|
|
309
|
+
@join_buffer.flush
|
|
310
|
+
|
|
311
|
+
# Compare heads and tails from segments
|
|
312
|
+
if threads.length > 1
|
|
313
|
+
head = tail = nil
|
|
314
|
+
threads.each do |thread|
|
|
315
|
+
if tail
|
|
316
|
+
head = thread[:head]
|
|
317
|
+
if head && compare_rows(tail[:row], head[:row])
|
|
318
|
+
@join_buffer.add([tail[:row][primary_key], head[:row][primary_key]])
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
tail = thread[:tail]
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def compare_rows(row_1, row_2, which = nil)
|
|
327
|
+
values = []
|
|
328
|
+
@pairs.each do |fields|
|
|
329
|
+
value_1 = row_1[fields[which || 0]]
|
|
330
|
+
value_2 = row_2[fields[which || 1]]
|
|
331
|
+
if value_1 == value_2
|
|
332
|
+
values << value_1
|
|
333
|
+
else
|
|
334
|
+
values = nil
|
|
335
|
+
break
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
values
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def create_group(row, which)
|
|
342
|
+
group_id = get_next_group_id
|
|
343
|
+
group_row = [group_id]
|
|
344
|
+
@group_value_fields.each do |fields|
|
|
345
|
+
group_row.push(row[fields[which || 0]])
|
|
346
|
+
end
|
|
347
|
+
@groups_buffer.add(group_row)
|
|
348
|
+
group_id
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def get_next_group_id
|
|
352
|
+
@mutex.synchronize { @group_number += 1 }
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
module Coupler
|
|
2
|
+
module Models
|
|
3
|
+
class Scenario < Sequel::Model
|
|
4
|
+
class NoMatcherError < Exception; end
|
|
5
|
+
class ResourcesOutOfDateError < Exception; end
|
|
6
|
+
|
|
7
|
+
include CommonModel
|
|
8
|
+
include Jobify
|
|
9
|
+
|
|
10
|
+
attr_writer :resource_ids
|
|
11
|
+
many_to_one :project
|
|
12
|
+
many_to_one :resource_1, :class => "Coupler::Models::Resource"
|
|
13
|
+
many_to_one :resource_2, :class => "Coupler::Models::Resource"
|
|
14
|
+
one_to_one :matcher
|
|
15
|
+
one_to_many :results
|
|
16
|
+
|
|
17
|
+
def self.count_by_project
|
|
18
|
+
dataset.naked.group_and_count(:project_id).to_hash(:project_id, :count)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def status
|
|
22
|
+
if matcher.nil?
|
|
23
|
+
"no_matcher"
|
|
24
|
+
elsif resources.any? { |r| r.status == "out_of_date" }
|
|
25
|
+
"resources_out_of_date"
|
|
26
|
+
else
|
|
27
|
+
"ok"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def resources
|
|
32
|
+
if resource_1
|
|
33
|
+
resource_2 ? [resource_1, resource_2] : [resource_1]
|
|
34
|
+
else
|
|
35
|
+
[]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def local_database(&block)
|
|
40
|
+
Sequel.connect(local_connection_string, {
|
|
41
|
+
:loggers => [Coupler::Logger.instance],
|
|
42
|
+
:max_connections => 50,
|
|
43
|
+
:pool_timeout => 60
|
|
44
|
+
}, &block)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def run!(&progress)
|
|
48
|
+
case status
|
|
49
|
+
when 'no_matcher' then raise NoMatcherError
|
|
50
|
+
when 'resources_out_of_date' then raise ResourcesOutOfDateError
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
runner = Runner.new(self, &progress)
|
|
54
|
+
runner.run!
|
|
55
|
+
|
|
56
|
+
update(:run_count => run_count + 1, :last_run_at => Time.now)
|
|
57
|
+
result = Result.new(:scenario => self, :run_number => run_count)
|
|
58
|
+
result.save
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def set_linkage_type
|
|
62
|
+
if resource_1
|
|
63
|
+
if resource_2
|
|
64
|
+
self.linkage_type = "dual-linkage"
|
|
65
|
+
elsif matcher && matcher.cross_match?
|
|
66
|
+
self.linkage_type = "cross-linkage"
|
|
67
|
+
else
|
|
68
|
+
self.linkage_type = "self-linkage"
|
|
69
|
+
end
|
|
70
|
+
else
|
|
71
|
+
self.linkage_type = "N/A"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def local_connection_string
|
|
78
|
+
Base.connection_string("scenario_#{id}")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def before_validation
|
|
82
|
+
super
|
|
83
|
+
if @resource_ids.is_a?(Array)
|
|
84
|
+
objects = project.resources_dataset.filter(:id => @resource_ids[0..1].compact).all
|
|
85
|
+
self.resource_1_id = objects[0].nil? ? nil : objects[0].id
|
|
86
|
+
self.resource_2_id = objects[1].nil? ? nil : objects[1].id
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def validate
|
|
91
|
+
super
|
|
92
|
+
validates_presence :name
|
|
93
|
+
validates_unique [:name, :project_id]
|
|
94
|
+
if resource_1_id.nil?
|
|
95
|
+
errors.add(:base, "At least one resource is required")
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def before_create
|
|
100
|
+
super
|
|
101
|
+
self.slug ||= name.downcase.gsub(/\s+/, "_")
|
|
102
|
+
set_linkage_type
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# There is currently no plan to let a user change resources
|
|
106
|
+
# after a scenario has been created.
|
|
107
|
+
#def before_update
|
|
108
|
+
#super
|
|
109
|
+
#set_linkage_type
|
|
110
|
+
#end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
require File.join(File.dirname(__FILE__), 'scenario', 'runner')
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
module Coupler
|
|
2
|
+
module Models
|
|
3
|
+
class Transformation < Sequel::Model
|
|
4
|
+
include CommonModel
|
|
5
|
+
many_to_one :resource
|
|
6
|
+
many_to_one :source_field, :class => Field
|
|
7
|
+
many_to_one :result_field, :class => Field
|
|
8
|
+
many_to_one :transformer
|
|
9
|
+
|
|
10
|
+
plugin :nested_attributes
|
|
11
|
+
nested_attributes :result_field
|
|
12
|
+
nested_attributes :transformer, :destroy => false
|
|
13
|
+
|
|
14
|
+
def transform(data)
|
|
15
|
+
transformer.transform(data, {
|
|
16
|
+
:in => source_field.name.to_sym,
|
|
17
|
+
:out => result_field.name.to_sym
|
|
18
|
+
})
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def field_changes
|
|
22
|
+
transformer.field_changes(source_field)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# NOTE: The fact that the aliased name doesn't have an = at the end is
|
|
26
|
+
# important. Ruby methods with names that have = at the end always
|
|
27
|
+
# return the RHS value, regardless of what the method actually returns.
|
|
28
|
+
# The only way to grab the associated object that gets created from the
|
|
29
|
+
# nested attributes methods is by fetching the return value.
|
|
30
|
+
#
|
|
31
|
+
alias :original_result_field_attributes :result_field_attributes=
|
|
32
|
+
def result_field_attributes=(h)
|
|
33
|
+
@staged_result_field = self.original_result_field_attributes(h.merge({
|
|
34
|
+
:is_generated => true
|
|
35
|
+
}))
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def deletable?
|
|
39
|
+
position == self.class.max(:position) &&
|
|
40
|
+
(result_field.nil? || !result_field.is_generated || result_field.scenarios_dataset.count == 0)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
def before_validation
|
|
45
|
+
super
|
|
46
|
+
if source_field_id && !result_field_id && !@staged_result_field
|
|
47
|
+
self.result_field_id = source_field_id
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if @staged_result_field && transformer && source_field && resource_id
|
|
51
|
+
hash = transformer.field_changes(source_field).values[0]
|
|
52
|
+
if hash.empty?
|
|
53
|
+
hash.update({
|
|
54
|
+
:type => source_field[:type],
|
|
55
|
+
:db_type => source_field[:db_type]
|
|
56
|
+
})
|
|
57
|
+
end
|
|
58
|
+
hash[:resource_id] = resource_id
|
|
59
|
+
@staged_result_field.set(hash)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def validate
|
|
64
|
+
super
|
|
65
|
+
validates_presence [:resource_id, :source_field_id]
|
|
66
|
+
if transformer.nil?
|
|
67
|
+
errors.add(:transformer_id, "is not present")
|
|
68
|
+
end
|
|
69
|
+
if errors.empty?
|
|
70
|
+
source_field = resource.fields_dataset[:id => source_field_id]
|
|
71
|
+
if source_field.nil?
|
|
72
|
+
errors.add(:source_field_id, "is invalid")
|
|
73
|
+
else
|
|
74
|
+
if transformer.allowed_types.is_a?(Array) && !transformer.allowed_types.include?(source_field.final_type)
|
|
75
|
+
errors.add(:base, "#{transformer.name} cannot transform type '#{source_field.final_type}'")
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
if !@staged_result_field
|
|
80
|
+
result_field = result_field_id ? resource.fields_dataset[:id => result_field_id] : nil
|
|
81
|
+
if result_field.nil?
|
|
82
|
+
errors.add(:result_field_id, "is invalid")
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def before_create
|
|
89
|
+
super
|
|
90
|
+
self.position ||= self.class.filter(:resource_id => resource_id).count + 1
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def after_save
|
|
94
|
+
super
|
|
95
|
+
resource.refresh_fields!
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def before_destroy
|
|
99
|
+
# Prevent all but the last transformation from being destroyed
|
|
100
|
+
#
|
|
101
|
+
# TODO: I'm putting off programming the complex logic required to
|
|
102
|
+
# enable deletion from the middle of a transformation stack.
|
|
103
|
+
#
|
|
104
|
+
super
|
|
105
|
+
deletable?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def after_destroy
|
|
109
|
+
super
|
|
110
|
+
if result_field && result_field.is_generated && self.class.filter(:result_field_id => result_field.id).count == 0
|
|
111
|
+
result_field.destroy
|
|
112
|
+
end
|
|
113
|
+
resource.refresh_fields! if resource
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Coupler
|
|
2
|
+
module Models
|
|
3
|
+
class Transformer
|
|
4
|
+
class Runner
|
|
5
|
+
instance_methods.each do |m|
|
|
6
|
+
undef_method m unless m =~ /^__|^instance_eval$/
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def initialize(code, input)
|
|
10
|
+
@input = input
|
|
11
|
+
@code = code
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def run
|
|
15
|
+
instance_eval(@code, __FILE__, __LINE__)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def value
|
|
19
|
+
@input
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def method_missing(name)
|
|
23
|
+
raise NoMethodError
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|