rstore 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ # encoding: utf-8
2
+
3
+ require 'csv'
4
+ require 'rstore/converter'
5
+ require 'rstore/storage'
6
+ require 'rstore/core_ext/object'
7
+ require 'rstore/core_ext/csv_wrapper'
8
+
9
+ module RStore
10
+ class Data
11
+
12
+ attr_reader :path
13
+ attr_reader :content
14
+ attr_reader :state
15
+ attr_reader :options
16
+
17
+
18
+ KnownStates = [:raw, :parsed, :converted, :error]
19
+
20
+
21
+ def initialize path, content, state, options
22
+ error_message = "#{path}: The following options are not valid as an argument to #{self.class}:\n#{options}"
23
+ raise ArgumentError, error_message unless options.is_a?(Hash)
24
+ @path = path
25
+ @content = content
26
+ self.state = state
27
+ @options = options
28
+ end
29
+
30
+
31
+ #def extract_type path
32
+ # path, filename = File.split(path)
33
+ # filename.match(/\.(?<type>.*)$/)[:type].to_sym
34
+ #end
35
+
36
+ def parse_csv
37
+ raise InvalidStateError, "#{state.inspect} is not a valid Data state for method 'to_csv'" unless state == :raw
38
+
39
+ file_options = @options[:file_options]
40
+ parse_options = @options[:parse_options]
41
+
42
+ begin
43
+ csv = CSVWrapper.parse(@content, parse_options)
44
+ csv = csv.drop(1) if file_options[:has_headers] == true # drop the first row if it is a header
45
+ rescue => e
46
+ Logger.new(@options).print(@data.path, :parse, e)
47
+ end
48
+
49
+ @state = :parsed
50
+ Data.new(@path, csv, @state, @options)
51
+ end
52
+
53
+
54
+
55
+ def convert_fields database, table_name
56
+ converter = Converter.new(self, database, table_name)
57
+ converter.convert
58
+ end
59
+
60
+
61
+ def into_db database, table_name
62
+ Storage.new(self, database, table_name).insert
63
+ end
64
+
65
+
66
+ def state= state
67
+ error_message = "#{state.inspect} is not a valid state. The following states are valid: #{print_valid_states}"
68
+ raise ArgumentError, error_message unless KnownStates.include?(state)
69
+ @state = state
70
+ end
71
+
72
+ # Helper methods --------------------------------
73
+
74
+ def print_valid_states
75
+ KnownStates.map { |s| s.inspect }.join(', ')
76
+ end
77
+
78
+ end
79
+ end
80
+
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+
3
+ module RStore
4
+ # The error thrown when the length of a row does not fit the number of columns in the db table.
5
+ class InvalidRowLengthError < StandardError; end
6
+ class NullNotAllowedError < StandardError; end
7
+ class InvalidStateError < StandardError; end
8
+ class FileProcessingError < StandardError; end
9
+
10
+ end
11
+
@@ -0,0 +1,135 @@
1
+ # encoding: utf-8
2
+
3
+ require 'open-uri'
4
+ require 'rstore/configuration'
5
+ require 'rstore/data'
6
+ require 'rstore/core_ext/string'
7
+
8
+ module RStore
9
+ class FileCrawler
10
+
11
+ #attr_reader :file_options_hash
12
+ attr_reader :data_hash
13
+
14
+ attr_reader :file_options, :parse_options
15
+ attr_reader :path
16
+ attr_reader :file_paths, :file_type
17
+ attr_reader :config
18
+
19
+
20
+
21
+ def initialize file_or_folder, file_type, options={}
22
+ @path = file_or_folder
23
+ @file_type = file_type
24
+ @config = Configuration.new(file_or_folder, options)
25
+ @file_options = @config.file_options
26
+ @parse_options = @config.parse_options
27
+ self.file_paths = @path
28
+ self.file_options_hash = @file_paths
29
+ self.data_hash = @file_options_hash
30
+ end
31
+
32
+
33
+ def file_paths= path
34
+ return @file_paths unless @file_paths.nil?
35
+
36
+ @file_paths = []
37
+ files = []
38
+ if path.url?
39
+ return @file_paths << verify_and_format_url(path)
40
+ elsif File.directory?(File.expand_path(path)) # Directory
41
+ Dir.chdir(path) do # Change current directory to 'path'.
42
+ parse_directory(@file_options[:recursive]).each do |f|
43
+ files << File.expand_path(f)
44
+ end
45
+ end
46
+ else # Either a file or a non-existing directory path
47
+ file = File.expand_path(path)
48
+ raise ArgumentError, "'#{path}' is not a valid path" unless File.exists?(file)
49
+
50
+ error_message = <<-MESSAGE.gsub(/^\s+/,'')
51
+ Not a #{@file_type} file.
52
+ NOTE: Non-#{@file_type} files in a directory path
53
+ are silently skipped WITHOUT raising an exception
54
+ MESSAGE
55
+
56
+ raise ArgumentError, error_message unless can_read?(path)
57
+
58
+ files << file
59
+ end
60
+
61
+ @file_paths = files
62
+ rescue Exception => e
63
+ # Dirty hack to be able to call instantiate Logger.
64
+ data = Data.new(path, '', :raw, Configuration.default_options)
65
+
66
+ logger = Logger.new(data)
67
+ logger.log(:fetch, e)
68
+ logger.error
69
+ end
70
+
71
+
72
+ def data_hash= options_hash
73
+ hash = Hash[options_hash.map do |path, options|
74
+ data = Data.new(path, '', :raw, options)
75
+ [path, data]
76
+ end]
77
+ @data_hash = hash
78
+ end
79
+
80
+
81
+ def file_options_hash= file_paths
82
+ @file_options_hash unless @file_options_hash.nil?
83
+
84
+ hash = Hash.new {|h,k| h[k] = Hash.new {|h,k| h[k] = nil}}
85
+ file_paths.each do |path|
86
+ hash[path][:file_options] = @file_options
87
+ hash[path][:parse_options] = @parse_options
88
+ end
89
+ @file_options_hash = hash
90
+ end
91
+
92
+
93
+ def parse_directory option
94
+ files = []
95
+ if option
96
+ files = Dir.glob("**/*.{#{@file_type}}") # Recursively read files into array, skip files that are not of @file_type
97
+ else
98
+ files = Dir.glob("*.{#{@file_type}}") # Read files of the current directory
99
+ end
100
+ files.each do |file|
101
+ next if File.directory? file
102
+ file
103
+ end
104
+ end
105
+
106
+
107
+ # Helper methods ---------------------------
108
+
109
+
110
+ def can_read? path
111
+ !!(/.*\.#{@file_type.to_s}$/ =~ path)
112
+ end
113
+
114
+
115
+ def verify_and_format_url url
116
+ address = url
117
+ begin # add additional 'begin' block so that we can return the original, unchanged url in the error message.
118
+ open(address)
119
+ address
120
+ rescue
121
+ case address
122
+ when /^www/ # open-uri does not recognize URLs starting with 'www'
123
+ address = 'http://' + address
124
+ retry
125
+ when /^http:/ # open-uri does not redirect from http to https on a valid https URL
126
+ address = address.gsub(/http/,'https')
127
+ retry
128
+ else
129
+ raise ArgumentError, "Could not connect to #{url}. Please check if this URL is correct."
130
+ end
131
+ end
132
+ end
133
+
134
+ end
135
+ end
@@ -0,0 +1,104 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rstore/exceptions'
4
+
5
+ module RStore
6
+ class Logger
7
+
8
+ attr_accessor :data
9
+ attr_accessor :message
10
+
11
+
12
+ KnownStates =
13
+ {:fetch => "loading files",
14
+ :parse => "parsing file content",
15
+ :convert => "converting field values into their corresponding datatypes",
16
+ :store => "storing file content into database"}
17
+
18
+
19
+
20
+ def initialize data_object
21
+ @data = data_object
22
+ @message = ''
23
+ end
24
+
25
+
26
+ def log state, error, loc={}
27
+ raise ArgumentError "#{state} is an invalid state vor #{self.class}" unless valid_state? state
28
+
29
+ loc = correct_location(loc)
30
+
31
+ type_of_error = error.class
32
+ error_message = error.to_s
33
+ location = "Location : #{location_to_s(loc)}"
34
+ location = loc.empty? ? '' : location
35
+
36
+ report = <<-TEXT.gsub(/^\s+/, '')
37
+ An error occured while #{KnownStates[state]}:
38
+ File : #{@data.path}
39
+ Type of error: #{type_of_error}
40
+ Error message: #{error_message}
41
+ #{location}
42
+ =============
43
+ Please fix the error and run again.
44
+ NOTE: No data has been inserted into the database yet.
45
+ =============
46
+ TEXT
47
+
48
+ @message = report
49
+ end
50
+
51
+
52
+ def error
53
+ raise FileProcessingError, @message
54
+ end
55
+
56
+
57
+ # Helper methods ------------------------
58
+
59
+ def location_to_s location
60
+ location.map { |loc,val| "#{loc} #{val}" }.join(', ')
61
+ end
62
+
63
+
64
+
65
+ def correct_location location
66
+
67
+ if location[:row] # row_index
68
+ row = correct_row(location[:row])
69
+ if location[:col] # col_index
70
+ col = location[:col]+1
71
+ {row: row, col: col}
72
+ else
73
+ {row: row}
74
+ end
75
+ else
76
+ location
77
+ end
78
+ end
79
+
80
+
81
+ def correct_row row
82
+ # row = row_index, which starts at 0
83
+ # Without headers: add 1 to row
84
+ # With headers : add another 1 to row as the header row had been already removed
85
+ row = with_headers? ? row+2 : row+1
86
+ row
87
+ end
88
+
89
+
90
+ def valid_state? state
91
+ KnownStates.keys.any? { |val| val == state }
92
+ end
93
+
94
+
95
+ def with_headers?
96
+ @data.options[:has_headers]
97
+ end
98
+
99
+ end
100
+ end
101
+
102
+
103
+
104
+
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ module RStore
4
+ module HelperMethods
5
+
6
+ # Calulate primary key from schema
7
+ def p_key schema
8
+ schema.map do |(col_name, col_properties)|
9
+ col_name if col_properties[:primary_key] == true
10
+ end.compact.first
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ require 'sequel'
4
+ require 'rstore/data'
5
+ require 'rstore/logger'
6
+ require 'rstore/exceptions'
7
+ require 'rstore/modules/helper_methods'
8
+
9
+ module RStore
10
+ class Storage
11
+ include HelperMethods
12
+
13
+ attr_accessor :data, :db, :table, :prepared_data, :primary_key
14
+ attr_accessor :state
15
+
16
+
17
+ def initialize data_object, database, table_name
18
+ state = data_object.state
19
+ raise InvalidStateError, "#{state.inspect} is not a valid state on initialization for class Storage" unless state == :converted
20
+ @state = state
21
+ @data = data_object.clone
22
+ @db = database
23
+ @table = table_name
24
+ @schema = @db.schema(@table)
25
+ @primary_key = p_key @schema
26
+ @prepared_data = prepare_data
27
+ end
28
+
29
+
30
+ def column_names
31
+ @schema.map do |(col_name, col_properties)|
32
+ col_name unless col_name == @primary_key
33
+ end.compact
34
+ end
35
+
36
+
37
+ def prepare_data
38
+ col_names = column_names
39
+ @data.content.map do |row|
40
+ Hash[col_names.zip(row)]
41
+ end
42
+ end
43
+
44
+
45
+ def insert
46
+ dataset = @db[@table]
47
+ begin
48
+ @db.transaction do
49
+ @prepared_data.each_with_index do |row, row_index|
50
+ @row_index = row_index
51
+ dataset.insert(row)
52
+ # Sequel often only throws an exception when retrieving an incorrect record,
53
+ # The following therefore is to catch invalid data of data types that are
54
+ # not checked by RStore::Converter
55
+ dataset.order(@primary_key).last
56
+ end
57
+ end
58
+ rescue Exception => e
59
+ logger = Logger.new(@data)
60
+ logger.log(:store, e, row: @row_index)
61
+ logger.error
62
+
63
+ end
64
+ @state = :stored
65
+ @state
66
+ end
67
+ end
68
+ end
69
+
70
+
71
+
@@ -0,0 +1,3 @@
1
+ module RStore
2
+ VERSION = "0.2.0"
3
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rstore
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stefan Rohlfing
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-10-27 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &17766760 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *17766760
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &17766300 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *17766300
36
+ description: ! '+ Batch processing of csv files
37
+
38
+ + Fetches data from different sources: files, directories, URLs
39
+
40
+ + Customizable using additional options 
41
+
42
+ + Validation of field values. At the moment validation of the following types is
43
+ supported
44
+
45
+ + Descriptive error messages pointing helping you to find any invalid data quickly
46
+
47
+ + Safe and transparent data storage:
48
+
49
+ + -- Using database transactions: Either the data from all files is stored or none
50
+
51
+ + -- The data storage method can only be executed once for every instance of RStore::CSV
52
+
53
+ '
54
+ email: stefan.rohlfing@gmail.com
55
+ executables: []
56
+ extensions: []
57
+ extra_rdoc_files: []
58
+ files:
59
+ - lib/rstore/version.rb
60
+ - lib/rstore/csv.rb
61
+ - lib/rstore/configuration.rb
62
+ - lib/rstore/converter.rb
63
+ - lib/rstore/base_db.rb
64
+ - lib/rstore/data.rb
65
+ - lib/rstore/file_crawler.rb
66
+ - lib/rstore/logger.rb
67
+ - lib/rstore/storage.rb
68
+ - lib/rstore/base_table.rb
69
+ - lib/rstore/core_ext/hash.rb
70
+ - lib/rstore/core_ext/csv_wrapper.rb
71
+ - lib/rstore/core_ext/string.rb
72
+ - lib/rstore/core_ext/object.rb
73
+ - lib/rstore/exceptions.rb
74
+ - lib/rstore/modules/helper_methods.rb
75
+ - lib/rstore.rb
76
+ - README.md
77
+ - Rakefile
78
+ - LICENSE
79
+ homepage: http://github.com/bytesource/rstore
80
+ licenses: []
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: 1.9.1
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project: rstore
99
+ rubygems_version: 1.8.10
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: RStore - A library for easy batch storage of csv data into a database
103
+ test_files: []