data_task 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ require_relative '../../db'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ class Db
7
+ module StandardTransactions
8
+
9
+ def with_transaction do_commit, &block
10
+ execute "begin;"
11
+ yield
12
+ close_command = do_commit ? "commit;" : "rollback;"
13
+ execute close_command
14
+ end
15
+
16
+ def with_transaction_commit &block
17
+ with_transaction true, &block
18
+ end
19
+
20
+ def with_transaction_rollback &block
21
+ with_transaction false, &block
22
+ end
23
+
24
+ end
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,34 @@
1
+ require_relative './db'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ # Represents a table accessed via a database, roughly analogous to the File class.
7
+ class Data
8
+
9
+ attr_accessor :name
10
+ attr_accessor :adapter
11
+
12
+ def initialize data_name, adapter
13
+ @name = data_name
14
+ @adapter = adapter
15
+ end
16
+
17
+ def exists? options={}
18
+ @adapter.data_exists?(@name, options)
19
+ end
20
+
21
+ alias_method :exist?, :exists?
22
+
23
+ def mtime
24
+ @adapter.data_mtime(@name)
25
+ end
26
+
27
+ def to_s
28
+ @name
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,57 @@
1
+ require 'yaml'
2
+ require 'logger'
3
+ require_relative './sql'
4
+ require_relative './util'
5
+
6
+ module Rake
7
+ module DataTask
8
+
9
+ class Db
10
+ # This is the base class for SQL-compliant relational databases. It contains utility methods
11
+ # that probably don't vary across databases, and it shouldn't be instantiated.
12
+
13
+ LOG = Logger.new(STDOUT)
14
+ LOG.level = Logger::WARN
15
+
16
+ TABLE_TRACKER_NAME = 'last_operations'
17
+
18
+ # enumerate case-sensitive, DBMS-specific values that we store in tracking tables
19
+ # this can be overridden in child classes for specific databases
20
+ TABLE_TRACKER_COLUMNS = {
21
+ :relation_name => { :data_type => :text },
22
+ :relation_type => {
23
+ :data_type => :text,
24
+ :values => {
25
+ :table => 'table',
26
+ :view => 'view'
27
+ }
28
+ },
29
+ :operation => {
30
+ :data_type => :text,
31
+ :values => {
32
+ :create => 'create',
33
+ :insert => 'insert',
34
+ :update => 'update',
35
+ :truncate => 'truncate',
36
+ :delete => 'delete'
37
+ }
38
+ },
39
+ :time => { :data_type => :timestamp }
40
+ }
41
+
42
+ def table_tracker_columns
43
+ TABLE_TRACKER_COLUMNS
44
+ end
45
+
46
+ def operation_values
47
+ table_tracker_columns[:operation][:values]
48
+ end
49
+
50
+ def relation_type_values
51
+ table_tracker_columns[:relation_type][:values]
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,49 @@
1
+ require 'time'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ class Sql
7
+
8
+ # Parse a single string value into an object using the supplied type logic.
9
+ #
10
+ # @param r [Array] an array (table) of arrays (rows), usually resulting from a database query
11
+ # @param &type_logic [Block] code that takes the first
12
+ # @raise [TypeError] if r contains more than one row or column
13
+ # @returns [Object] the return value of &type_logic on the value in r
14
+ def self.parse_single_value r, &type_logic
15
+ if r.nil? || r.empty? || r == [[]] || r == [[nil]]
16
+ return nil
17
+ elsif r.length > 1
18
+ raise TypeError, 'Query must result in a single row'
19
+ elsif r.first.length > 1
20
+ raise TypeError, 'Query must result in a single column'
21
+ end
22
+ yield(r)
23
+ end
24
+
25
+ # Get a single integer via SQL.
26
+ #
27
+ # @param r [Array] an array containing a single result from a query
28
+ # @returns [Integer] the single result converted to an Integer
29
+ def self.get_single_int r
30
+ parse_single_value r do
31
+ Integer(r.first.first)
32
+ end
33
+ end
34
+
35
+ # Get a single time via SQL.
36
+ #
37
+ # @param r [Array] an array containing a single result from a query
38
+ # @returns [Time] the single result converted to Ruby's local time
39
+ def self.get_single_time r
40
+ parse_single_value r do
41
+ t = Time.parse(r.first.first)
42
+ DateTime.parse(t.to_s)
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,35 @@
1
+ require 'data_task/adapters/postgres'
2
+ require 'data_task/adapters/sqlite'
3
+
4
+ # set up adapters to two databases
5
+ postgres = Rake::DataTask::Postgres.new(
6
+ 'host' => 'localhost', 'port' => 5432, 'database' => 'example', 'username' => 'postgres')
7
+ sqlite = Rake::DataTask::Sqlite.new('database' => 'example')
8
+
9
+
10
+ desc "Build a data file."
11
+ file 'raw.txt' do
12
+ File.open('raw.txt', 'w') { |file| file.write("v1") }
13
+ end
14
+
15
+ desc "Load a data file into PostgreSQL for analysis."
16
+ data postgres['raw'] => 'raw.txt' do
17
+ postgres.create_table "raw", nil, "(var1 text)"
18
+ postgres.execute "copy raw from '#{File.expand_path('raw.txt',Dir.pwd)}'"
19
+ end
20
+
21
+ desc "Perform analysis in PostgreSQL."
22
+ data postgres['analyzed'] => postgres['raw'] do
23
+ # perform analysis ...
24
+ postgres.create_table "analyzed", "select * from raw", nil
25
+ end
26
+
27
+ desc "Archive analysis results in SQLite."
28
+ data sqlite['analyzed_archive'] => postgres['analyzed'] do
29
+ sqlite.create_table "analyzed_archive", nil, "(var1 text)"
30
+ r = postgres.execute "select var1 from analyzed"
31
+ sqlite.execute <<-EOSQL
32
+ insert into analyzed_archive values
33
+ ('#{ r.flatten.join("'),('") }')
34
+ EOSQL
35
+ end
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def to_quoted_s
3
+ commad = self.join("','")
4
+ "'#{commad}'"
5
+ end
6
+ end
@@ -0,0 +1,5 @@
1
+ module Rake
2
+ module DataTask
3
+ VERSION = "0.0.2"
4
+ end
5
+ end
data/lib/data_task.rb ADDED
@@ -0,0 +1,76 @@
1
+ require 'rake'
2
+ require 'data_task/version'
3
+ require 'data_task/data'
4
+ require 'data_task/adapters/postgres'
5
+ require 'data_task/adapters/sqlite'
6
+
7
+ module Rake
8
+ # #########################################################################
9
+ # A DataTask is a task that includes time based dependencies. If any of a
10
+ # DataTask's prerequisites has a timestamp that is later than the data
11
+ # represented by this task, then the data must be rebuilt (using the
12
+ # supplied actions).
13
+ module DataTask
14
+
15
+ class DataTask < ::Rake::Task
16
+
17
+ # Instantiate a new DataTask.
18
+ #
19
+ # @param [Data] data the Data object that keeps track of existence and modification
20
+ # @param [Rake::Application] app required by the parent class's constructor
21
+ def initialize(data, app)
22
+ super
23
+ @data = data
24
+ end
25
+
26
+ # Is this table task needed? Yes if it doesn't exist, or if its time stamp
27
+ # is out of date.
28
+ def needed?
29
+ !@data.exist? || out_of_date?(timestamp)
30
+ end
31
+
32
+ # Time stamp for data task.
33
+ def timestamp
34
+ if @data.exist?
35
+ mtime = @data.mtime.to_time
36
+ raise "Table #{name} exists but modified time is unavailable." if mtime.nil?
37
+ mtime
38
+ else
39
+ Rake::EARLY
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ # Are there any prerequisites with a later time than the given time stamp?
46
+ def out_of_date?(stamp)
47
+ @prerequisites.any? do |n|
48
+ prereq_time = application[n, @scope].timestamp
49
+ return false if prereq_time == Rake::EARLY
50
+
51
+ prereq_time > stamp
52
+ end
53
+ end
54
+
55
+ # ----------------------------------------------------------------
56
+ # Task class methods.
57
+ #
58
+ class << self
59
+ # Apply the scope to the task name according to the rules for this kind
60
+ # of task. Table based tasks ignore the scope when creating the name.
61
+ def scope_name(scope, task_name)
62
+ task_name
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+ end
70
+
71
+ def data(*args, &block)
72
+ # The task name in *args here is a Data returned by the adapter. Rake will key this task by
73
+ # Data.to_s in @tasks [Array]. All task recording and lookup in Rake is already done via to_s
74
+ # already to accomdate tasks named by symbols.
75
+ Rake::DataTask::DataTask.define_task(*args, &block)
76
+ end
@@ -0,0 +1,10 @@
1
+ sqlite_test:
2
+ adapter: sqlite
3
+ database: ci_test
4
+
5
+ postgres_test:
6
+ adapter: postgres
7
+ database: ci_test
8
+ host: localhost
9
+ port: 5432
10
+ username: postgres
data/test/data_spec.rb ADDED
@@ -0,0 +1,81 @@
1
+ require_relative './helper.rb'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ describe Data do
7
+
8
+ test_data_name = "test"
9
+
10
+ def mtime_updated? data, operation
11
+ original_mtime = data.mtime
12
+ sleep(1)
13
+ operation.call
14
+ data.mtime > original_mtime
15
+ end
16
+
17
+ around do |test|
18
+ @adapter = get_adapter
19
+ @adapter.with_transaction_rollback do
20
+ test.call
21
+ end
22
+ end
23
+
24
+ it "has a modified time after creation" do
25
+ @adapter.with_tracking do
26
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
27
+ t = Data.new(test_data_name, @adapter)
28
+ t.mtime.to_time.must_be :>, Time.new(0)
29
+ end
30
+ end
31
+
32
+ it "has an updated modified time after insert" do
33
+ @adapter.with_tracking do
34
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
35
+ t = Data.new(test_data_name, @adapter)
36
+ operation = lambda do
37
+ @adapter.execute "insert into #{test_data_name} values (1)"
38
+ end
39
+ mtime_updated?(t, operation).must_equal true
40
+ end
41
+ end
42
+
43
+ it "has an updated modified time after update" do
44
+ @adapter.with_tracking do
45
+ @adapter.create_data test_data_name, nil, "(var1 integer, var2 integer)"
46
+ t = Data.new(test_data_name, @adapter)
47
+ @adapter.execute "insert into #{test_data_name} values (1, 1)"
48
+ operation = lambda do
49
+ @adapter.execute "update #{test_data_name} set var2 = 2 where var1 = 1"
50
+ end
51
+ mtime_updated?(t, operation).must_equal true
52
+ end
53
+ end
54
+
55
+ it "has an updated modified time after delete" do
56
+ @adapter.with_tracking do
57
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
58
+ t = Data.new(test_data_name, @adapter)
59
+ @adapter.execute "insert into #{test_data_name} values (1)"
60
+ operation = lambda do
61
+ @adapter.execute "delete from #{test_data_name}"
62
+ end
63
+ mtime_updated?(t, operation).must_equal true
64
+ end
65
+ end
66
+
67
+ it "has an updated modified time after truncate" do
68
+ @adapter.with_tracking do
69
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
70
+ t = Data.new(test_data_name, @adapter)
71
+ @adapter.execute "insert into #{test_data_name} values (1)"
72
+ operation = lambda do
73
+ @adapter.truncate_data test_data_name
74
+ end
75
+ mtime_updated?(t, operation).must_equal true
76
+ end
77
+ end
78
+
79
+ end
80
+ end
81
+ end
data/test/db_spec.rb ADDED
@@ -0,0 +1,202 @@
1
+ require_relative './helper.rb'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ describe Db do
7
+
8
+ around do |test|
9
+ @adapter = get_adapter
10
+ @adapter.with_transaction_rollback do
11
+ test.call
12
+ end
13
+ end
14
+
15
+ test_table = "test_table"
16
+ test_view = "test_view"
17
+
18
+ it "finds a table when it exists" do
19
+ if !@adapter.table_exists?(test_table)
20
+ @adapter.execute <<-EOSQL
21
+ create table #{test_table} (var1 text)
22
+ EOSQL
23
+ end
24
+ @adapter.table_exists?(test_table).must_equal true
25
+ end
26
+
27
+ it "does not find a table when it does not exist" do
28
+ if @adapter.table_exists?(test_table)
29
+ @adapter.execute <<-EOSQL
30
+ drop table #{test_table}
31
+ EOSQL
32
+ end
33
+ @adapter.table_exists?(test_table).must_equal false
34
+ end
35
+
36
+ it "creates a table when called to" do
37
+ @adapter.with_tracking do
38
+
39
+ @adapter.create_table test_table, nil, '(var1 text)'
40
+ @adapter.table_exists?(test_table).must_equal true
41
+
42
+ end
43
+ end
44
+
45
+ it "drops a table when called to" do
46
+ @adapter.with_tracking do
47
+
48
+ @adapter.create_table test_table, nil, '(var1 text)'
49
+ @adapter.drop_table test_table
50
+ @adapter.table_exists?(test_table).must_equal false
51
+
52
+ end
53
+ end
54
+
55
+ it "creates a view when called to" do
56
+ @adapter.with_tracking do
57
+
58
+ @adapter.create_table test_table, nil, '(var1 text)'
59
+ @adapter.create_view test_view, "select * from #{test_table}"
60
+ @adapter.view_exists?(test_view).must_equal true
61
+
62
+ end
63
+ end
64
+
65
+ it "drops a view when called to" do
66
+ @adapter.with_tracking do
67
+
68
+ @adapter.create_table test_table, nil, '(var1 text)'
69
+ @adapter.create_view test_view, "select * from #{test_table}"
70
+ @adapter.drop_view test_view
71
+ @adapter.view_exists?(test_view).must_equal false
72
+
73
+ end
74
+ end
75
+
76
+ it "drops a view when the underlying table is dropped" do
77
+ @adapter.with_tracking do
78
+
79
+ @adapter.create_table test_table, nil, '(var1 text)'
80
+ @adapter.create_view test_view, "select * from #{test_table}"
81
+ @adapter.drop_table test_table
82
+ @adapter.view_exists?(test_view).must_equal false
83
+
84
+ end
85
+ end
86
+
87
+ it "updates the tracking table when it creates a table" do
88
+ @adapter.with_tracking do
89
+
90
+ @adapter.create_table test_table, nil, '(var1 text)'
91
+ tracked_create = Sql.get_single_int(
92
+ @adapter.execute <<-EOSQL
93
+ select 1 from #{Db::TABLE_TRACKER_NAME}
94
+ where
95
+ relation_name = '#{test_table}' and
96
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
97
+ operation = '#{@adapter.operation_values[:create]}'
98
+ EOSQL
99
+ )
100
+ tracked_create.must_equal 1
101
+
102
+ end
103
+ end
104
+
105
+ it "updates the tracking table when it drops a table" do
106
+ @adapter.with_tracking do
107
+
108
+ @adapter.create_table test_table, nil, '(var1 text)'
109
+ @adapter.drop_table test_table
110
+ still_tracking_table = Sql.get_single_int(
111
+ @adapter.execute <<-EOSQL
112
+ select 1 from #{Db::TABLE_TRACKER_NAME}
113
+ where
114
+ relation_name = '#{test_table}' and
115
+ relation_type = '#{@adapter.relation_type_values[:table]}'
116
+ EOSQL
117
+ )
118
+ still_tracking_table.must_be_nil
119
+
120
+ end
121
+ end
122
+
123
+ it "updates the tracking table on insert to a tracked table" do
124
+ @adapter.with_tracking do
125
+
126
+ @adapter.create_table test_table, nil, '(var1 text)'
127
+ @adapter.execute <<-EOSQL
128
+ insert into #{test_table} values ('a')
129
+ EOSQL
130
+ tracked_insert = Sql.get_single_int(
131
+ @adapter.execute <<-EOSQL
132
+ select 1 from #{Db::TABLE_TRACKER_NAME}
133
+ where
134
+ relation_name = '#{test_table}' and
135
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
136
+ operation = '#{@adapter.operation_values[:insert]}'
137
+ EOSQL
138
+ )
139
+ tracked_insert.must_equal 1
140
+
141
+ end
142
+ end
143
+
144
+ it "updates the tracking table on update on a tracked table" do
145
+ @adapter.with_tracking do
146
+
147
+ @adapter.create_table test_table, nil, '(var1 text, var2 text)'
148
+ @adapter.execute <<-EOSQL
149
+ insert into #{test_table} values ('a', 'a')
150
+ EOSQL
151
+ @adapter.execute <<-EOSQL
152
+ update #{test_table} set var2 = 'b' where var1 = 'a'
153
+ EOSQL
154
+
155
+ tracked_insert = Sql.get_single_int(
156
+ @adapter.execute <<-EOSQL
157
+ select 1 from #{Db::TABLE_TRACKER_NAME}
158
+ where
159
+ relation_name = '#{test_table}' and
160
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
161
+ operation = '#{@adapter.operation_values[:update]}'
162
+ EOSQL
163
+ )
164
+ tracked_insert.must_equal 1
165
+
166
+ end
167
+ end
168
+
169
+ it "updates the tracking table on truncate of a tracked table" do
170
+ @adapter.with_tracking do
171
+
172
+ @adapter.create_table test_table, nil, '(var1 text)'
173
+ @adapter.truncate_table test_table
174
+ tracked_truncate = Sql.get_single_int(
175
+ @adapter.execute <<-EOSQL
176
+ select 1 from #{Db::TABLE_TRACKER_NAME}
177
+ where
178
+ relation_name = '#{test_table}' and
179
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
180
+ operation = '#{@adapter.operation_values[:truncate]}'
181
+ EOSQL
182
+ )
183
+ tracked_truncate.must_equal 1
184
+
185
+ end
186
+ end
187
+
188
+ it "says it is tracking tables after tracking is set up" do
189
+ @adapter.tear_down_tracking
190
+ @adapter.set_up_tracking
191
+ @adapter.tracking_tables?.must_equal true
192
+ end
193
+
194
+ it "says it is not tracking tables after tracking is torn down" do
195
+ @adapter.tear_down_tracking
196
+ (@adapter.tracking_tables?).must_equal false
197
+ end
198
+
199
+ end
200
+
201
+ end
202
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,37 @@
1
+ # https://coveralls.io
2
+ require 'coveralls'
3
+ Coveralls.wear!
4
+
5
+ require 'minitest/autorun'
6
+ require 'minitest/around/spec'
7
+ require 'minitest-spec-context'
8
+
9
+ require 'logger'
10
+
11
+ require 'data_task/sql'
12
+ require 'data_task/db'
13
+ require 'data_task/data'
14
+ require 'data_task'
15
+
16
+ require 'data_task/adapters/sqlite'
17
+ require 'data_task/adapters/postgres'
18
+
19
+ def get_adapter
20
+ # connect an adapter to the configured database for testing
21
+ config = YAML.load_file('test/config/database.yml')[ENV['DATATASK_ENV'] || 'sqlite_test']
22
+ klass = "Rake::DataTask::#{config['adapter'].capitalize}".split('::').inject(Object) {|memo, name| memo = memo.const_get(name); memo}
23
+ adapter = klass.new(config)
24
+
25
+ # extend the adapter to enable clean tracking setup/teardown within each test
26
+ adapter.extend(TrackingSetupTeardownHelper)
27
+
28
+ adapter
29
+ end
30
+
31
+ module TrackingSetupTeardownHelper
32
+ def with_tracking &ops
33
+ set_up_tracking
34
+ ops.call
35
+ tear_down_tracking
36
+ end
37
+ end