rstore 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +93 -58
- data/lib/rstore/configuration.rb +4 -4
- data/lib/rstore/converter.rb +39 -14
- data/lib/rstore/csv.rb +36 -31
- data/lib/rstore/version.rb +1 -1
- metadata +9 -9
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# RStore
|
1
|
+
# RStore
|
2
2
|
|
3
3
|
### A library for easy batch storage of csv data into a database
|
4
4
|
|
@@ -6,70 +6,100 @@ Uses the CSV standard library for parsing, *Nokogiri* for URL handling, and *Seq
|
|
6
6
|
|
7
7
|
## Special Features
|
8
8
|
|
9
|
-
* **Batch processing** of csv files
|
10
|
-
* Fetches data from different sources: **files, directories, URLs**
|
11
|
-
* **Customizable** using additional options (also see section *Available Options*)
|
12
|
-
* **Validation of field values**. At the moment validation of the following types is supported:
|
13
|
-
* `String`, `Integer`, `Float`, `Date`, `DateTime`, `Time`, and `Boolean`
|
14
|
-
* **Descriptive error messages** pointing helping you to find any invalid data quickly.
|
15
|
-
* Only define your database and table classes once, then just `require` them when needed.
|
16
|
-
* **Safe and transparent data storage**:
|
17
|
-
* Using database transactions: Either the data from all all files is stored or none (also see section *Database Requirements*)
|
9
|
+
* **Batch processing** of csv files
|
10
|
+
* Fetches data from different sources: **files, directories, URLs**
|
11
|
+
* **Customizable** using additional options (also see section *Available Options*)
|
12
|
+
* **Validation of field values**. At the moment validation of the following types is supported:
|
13
|
+
* `String`, `Integer`, `Float`, `Date`, `DateTime`, `Time`, and `Boolean`
|
14
|
+
* **Descriptive error messages** pointing helping you to find any invalid data quickly.
|
15
|
+
* Only define your database and table classes once, then just `require` them when needed.
|
16
|
+
* **Safe and transparent data storage**:
|
17
|
+
* Using database transactions: Either the data from all all files is stored or none (also see section *Database Requirements*)
|
18
18
|
* To avoid double entry of data, the `run` method can only be run once on a single instance of `RStore::CSV`.
|
19
19
|
|
20
20
|
|
21
21
|
## Database Requirements
|
22
|
-
|
22
|
+
|
23
23
|
1. Expects the database table to have an addition column storing an auto-incrementing primary key.
|
24
|
-
2. **Requires the database to support transactions**:
|
25
|
-
Most other database platforms support transactions natively.
|
26
|
-
In MySQL, you'll need to be running `InnoDB` or `BDB` table types rather than the more common `MyISAM`.
|
27
|
-
If you are using MySQL and the table has not been created yet, RStore::CSV will take care of using the
|
24
|
+
2. **Requires the database to support transactions**:
|
25
|
+
Most other database platforms support transactions natively.
|
26
|
+
In MySQL, you'll need to be running `InnoDB` or `BDB` table types rather than the more common `MyISAM`.
|
27
|
+
If you are using MySQL and the table has not been created yet, RStore::CSV will take care of using the
|
28
28
|
correct table type upon creation.
|
29
29
|
|
30
30
|
|
31
31
|
## Installation
|
32
32
|
|
33
|
-
```
|
34
|
-
gem install
|
33
|
+
``` bash
|
34
|
+
$ gem install rstore
|
35
|
+
```
|
36
|
+
|
37
|
+
**Note**:
|
38
|
+
As `RStore` depends on [Nokogiri](http://nokogiri.org/) for fetching data from URLs, you need to install Nokogiri first to use this feature.
|
39
|
+
However, on some operating systems there can be problems due to missing libraries,
|
40
|
+
so you might want to take a look at the following installation instructions:
|
41
|
+
|
42
|
+
**Debian**
|
43
|
+
Users of Debian Linux (e.g. Ubuntu) need to run:
|
44
|
+
|
45
|
+
``` bash
|
46
|
+
$ sudo apt-get install libxslt1-dev libxml2-dev
|
47
|
+
|
48
|
+
$ gem install nokogiri
|
49
|
+
|
50
|
+
```
|
51
|
+
|
52
|
+
**Mac OS X**
|
53
|
+
The following instruction should work, but I haven't tested them personally
|
54
|
+
|
55
|
+
``` bash
|
56
|
+
$ sudo port install libxml2 libxslt
|
57
|
+
|
58
|
+
$ gem install nokogiri
|
59
|
+
|
35
60
|
```
|
36
61
|
|
62
|
+
Source: [Installing Nokogiri](http://nokogiri.org/tutorials/installing_nokogiri.html)
|
63
|
+
|
64
|
+
If you have any difficulties installing Nokogiri, please let me know, so that I can help you.
|
65
|
+
|
66
|
+
|
37
67
|
## Public API Documentation
|
38
68
|
|
39
|
-
|
69
|
+
The documentation is hosted on *RubyDoc.info*: [RStore Public API documentation](http://rubydoc.info/github/bytesource/rstore).
|
40
70
|
|
41
71
|
|
42
72
|
## Sample Usage
|
43
73
|
|
44
74
|
Sample csv file
|
45
75
|
|
46
|
-
> "product","quantity","price","created_at","min_demand","max_demand","on_stock"
|
47
|
-
> "toy1","1","1.12","2011-2-4","1:30","1:30am","true"
|
48
|
-
> "toy2","2","2.22","2012/2/4","2:30","2:30pm","false
|
49
|
-
> "toy3","3","3.33","2013/2/4","3:30","3:30 a.m.","True
|
50
|
-
> "toy4","4",,,"4:30","4:30 p.m.","False"
|
51
|
-
> "toy4","5","5.55","2015-2-4","5:30","5:30AM","1"
|
52
|
-
> "toy5","6","6.66","2016/2/4","6:30","6:30 P.M.","0"
|
53
|
-
> "toy6","7","7.77",,,,"false"
|
76
|
+
> "product","quantity","price","created_at","min_demand","max_demand","on_stock"
|
77
|
+
> "toy1","1","1.12","2011-2-4","1:30","1:30am","true"
|
78
|
+
> "toy2","2","2.22","2012/2/4","2:30","2:30pm","false
|
79
|
+
> "toy3","3","3.33","2013/2/4","3:30","3:30 a.m.","True
|
80
|
+
> "toy4","4",,,"4:30","4:30 p.m.","False"
|
81
|
+
> "toy4","5","5.55","2015-2-4","5:30","5:30AM","1"
|
82
|
+
> "toy5","6","6.66","2016/2/4","6:30","6:30 P.M.","0"
|
83
|
+
> "toy6","7","7.77",,,,"false"
|
54
84
|
|
55
85
|
|
56
86
|
1) Load gem
|
57
87
|
|
58
|
-
``` ruby
|
88
|
+
``` ruby
|
59
89
|
|
60
90
|
require 'rstore/csv'
|
61
91
|
|
62
92
|
```
|
63
|
-
2) Store database information in a subclass of `RStore::BaseDB`
|
93
|
+
2) Store database information in a subclass of `RStore::BaseDB`
|
64
94
|
Naming convention: name => NameDB
|
65
95
|
|
66
|
-
``` ruby
|
96
|
+
``` ruby
|
67
97
|
|
68
98
|
class CompanyDB < RStore::BaseDB
|
69
99
|
|
70
100
|
# Same as Sequel.connect, except that you don't need to
|
71
101
|
# provide the :database key.
|
72
|
-
info(:adapter => 'mysql',
|
102
|
+
info(:adapter => 'mysql',
|
73
103
|
:host => 'localhost',
|
74
104
|
:user => 'root',
|
75
105
|
:password => 'xxx')
|
@@ -78,10 +108,10 @@ end
|
|
78
108
|
|
79
109
|
```
|
80
110
|
|
81
|
-
3) Store table information in a subclass of `RStore::BaseTable`
|
111
|
+
3) Store table information in a subclass of `RStore::BaseTable`
|
82
112
|
Naming convention: name => NameTable
|
83
113
|
|
84
|
-
``` ruby
|
114
|
+
``` ruby
|
85
115
|
|
86
116
|
class ProductsTable < RStore::BaseTable
|
87
117
|
|
@@ -100,16 +130,16 @@ class ProductsTable < RStore::BaseTable
|
|
100
130
|
|
101
131
|
end
|
102
132
|
|
103
|
-
```
|
133
|
+
```
|
104
134
|
|
105
|
-
**Note**:
|
135
|
+
**Note**:
|
106
136
|
You can put the database and table class definitions in separate files
|
107
137
|
and `require` them when needed.
|
108
138
|
|
109
139
|
|
110
|
-
4) Enter csv data into the database
|
111
|
-
The `from` method accepts a path to a file or directory as well as an URL.
|
112
|
-
The `to` metthod accepts a string of the form *db_name.table_name*
|
140
|
+
4) Enter csv data into the database
|
141
|
+
The `from` method accepts a path to a file or directory as well as an URL.
|
142
|
+
The `to` metthod accepts a string of the form *db_name.table_name*
|
113
143
|
|
114
144
|
```ruby
|
115
145
|
RStore::CSV.new do
|
@@ -122,16 +152,16 @@ end
|
|
122
152
|
|
123
153
|
```
|
124
154
|
### Additional Features
|
125
|
-
---
|
155
|
+
---
|
126
156
|
|
127
157
|
You can change and reset the default options (see section *Available Options* below for details)
|
128
158
|
|
129
|
-
``` ruby
|
159
|
+
``` ruby
|
130
160
|
# Search directories recursively and handle the first row of a file as data by default
|
131
|
-
RStore::CSV.change_default_options(:recursive => true, :has_headers => false)
|
161
|
+
RStore::CSV.change_default_options(:recursive => true, :has_headers => false)
|
132
162
|
|
133
163
|
RStore::CSV.new do
|
134
|
-
from 'dir1'
|
164
|
+
from 'dir1'
|
135
165
|
from 'dir2'
|
136
166
|
from 'dir3'
|
137
167
|
to 'company.products'
|
@@ -143,12 +173,12 @@ RStore::CSV.reset_default_options
|
|
143
173
|
|
144
174
|
```
|
145
175
|
|
146
|
-
There is also a convenience method enabling you to use
|
176
|
+
There is also a convenience method enabling you to use
|
147
177
|
all of [Sequels query methods](http://sequel.rubyforge.org/rdoc/files/doc/querying_rdoc.html).
|
148
178
|
|
149
|
-
``` ruby
|
150
|
-
RStore::CSV.query('company.products') do |table| # table = Sequel::Dataset object
|
151
|
-
table.all # fetch everything
|
179
|
+
``` ruby
|
180
|
+
RStore::CSV.query('company.products') do |table| # table = Sequel::Dataset object
|
181
|
+
table.all # fetch everything
|
152
182
|
table.all[3] # fetch row number 4 (see output below)
|
153
183
|
table.filter(:id => 2).update(:on_stock => true) # update entry
|
154
184
|
table.filter(:id => 3).delete # delete entry
|
@@ -159,7 +189,7 @@ end
|
|
159
189
|
*)
|
160
190
|
Output of `db[table.name].all[3]`
|
161
191
|
|
162
|
-
``` ruby
|
192
|
+
``` ruby
|
163
193
|
# {:product => "toy4",
|
164
194
|
# :quantity => 4,
|
165
195
|
# :price => nil,
|
@@ -170,10 +200,10 @@ Output of `db[table.name].all[3]`
|
|
170
200
|
|
171
201
|
```
|
172
202
|
|
173
|
-
Access all of Sequels functionality by using the convenience methods
|
174
|
-
`BaseDB.connect`, `BaseTable.name`, and `BaseTable.table_info`:
|
203
|
+
Access all of Sequels functionality by using the convenience methods
|
204
|
+
`BaseDB.connect`, `BaseTable.name`, and `BaseTable.table_info`:
|
175
205
|
|
176
|
-
``` ruby
|
206
|
+
``` ruby
|
177
207
|
|
178
208
|
DB = CompanyDB.connect # Open connection to 'company' database
|
179
209
|
name = ProductTable.name # Table name, :products, used as an argument to the following methods.
|
@@ -198,14 +228,19 @@ The method `from` accepts two kinds of options, file options and parse options:
|
|
198
228
|
### File Options
|
199
229
|
File options are used for fetching csv data from a source. The following options are recognized:
|
200
230
|
|
201
|
-
* **:has_headers**, default: `true`
|
231
|
+
* **:has_headers**, default: `true`
|
202
232
|
* When set to false, the first line of a file is processed as data, otherwise it is discarded.
|
203
|
-
* **:recursive**, default: `false`
|
204
|
-
* When set to true and a directory is given, recursively search for files. Non-csv files are skipped.
|
205
|
-
* **:
|
206
|
-
*
|
207
|
-
|
208
|
-
|
233
|
+
* **:recursive**, default: `false`
|
234
|
+
* When set to true and a directory is given, recursively search for files. Non-csv files are skipped.
|
235
|
+
* **:digit_seps**, default `[',', '.']`
|
236
|
+
* The *thousands separator* and *decimal mark* used for numbers in the data source.
|
237
|
+
Different countries use different thousands separators and decimal marks,
|
238
|
+
and setting this options ensures that parsing of these numbers succeeds.
|
239
|
+
Note that all numbers will still be *stored* in the format that Ruby recognizes, that is with a point (.) as the decimal mark.
|
240
|
+
* **:selector**, default: `""`
|
241
|
+
* Mandatory css selector when fetching data from an URL. For more details please see the section *Further Reading* below
|
242
|
+
|
243
|
+
|
209
244
|
### Parse Options
|
210
245
|
Parse options are arguments to `CSV::parse`. The following options are recognized:
|
211
246
|
|
@@ -223,7 +258,7 @@ Parse options are arguments to `CSV::parse`. The following options are recognize
|
|
223
258
|
For more information on the parse options, please see section *Further Reading* below.
|
224
259
|
|
225
260
|
|
226
|
-
## Further Reading
|
261
|
+
## Further Reading
|
227
262
|
|
228
263
|
* Sequel
|
229
264
|
* [Cheat sheet][sequel_cheat]
|
@@ -240,7 +275,7 @@ For more information on the parse options, please see section *Further Reading*
|
|
240
275
|
[sequel_query]: http://sequel.rubyforge.org/rdoc/files/doc/querying_rdoc.html
|
241
276
|
[csv_options]: http://ruby-doc.org/stdlib-1.9.2/libdoc/csv/rdoc/CSV.html#method-c-new
|
242
277
|
[csv_standard]: http://www.ietf.org/rfc/rfc4180.txt
|
243
|
-
[nokogiri_home]: http://nokogiri.org/
|
278
|
+
[nokogiri_home]: http://nokogiri.org/
|
244
279
|
|
245
280
|
|
246
281
|
## Feedback
|
data/lib/rstore/configuration.rb
CHANGED
@@ -12,7 +12,7 @@ module RStore
|
|
12
12
|
|
13
13
|
|
14
14
|
# Supported options
|
15
|
-
@default_file_options = {recursive: false, has_headers: true, selector: ''}
|
15
|
+
@default_file_options = {recursive: false, has_headers: true, selector: '', digit_seps: [',', '.']}
|
16
16
|
@default_parse_options = {row_sep: :auto, col_sep: ",", quote_char: '"', field_size_limit: nil, skip_blanks: false}.freeze
|
17
17
|
@default_options = @default_file_options.merge(@default_parse_options)
|
18
18
|
|
@@ -59,7 +59,7 @@ module RStore
|
|
59
59
|
acc
|
60
60
|
end
|
61
61
|
|
62
|
-
@options = result
|
62
|
+
@options = result
|
63
63
|
end
|
64
64
|
|
65
65
|
|
@@ -67,7 +67,7 @@ module RStore
|
|
67
67
|
keys = options.keys
|
68
68
|
@options.inject({}) do |acc, (option, value)|
|
69
69
|
if keys.include?(option)
|
70
|
-
acc[option] = value
|
70
|
+
acc[option] = value
|
71
71
|
end
|
72
72
|
|
73
73
|
acc
|
@@ -123,4 +123,4 @@ module RStore
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
-
|
126
|
+
|
data/lib/rstore/converter.rb
CHANGED
@@ -16,26 +16,26 @@ module RStore
|
|
16
16
|
# Will be set to :converted on successfull conversion.
|
17
17
|
attr_accessor :state
|
18
18
|
|
19
|
-
|
19
|
+
|
20
20
|
boolean_converter = lambda do |field|
|
21
21
|
if field.downcase == 'true' || field == '1'
|
22
|
-
return true
|
22
|
+
return true
|
23
23
|
end
|
24
|
-
if field.downcase == 'false' || field == '0'
|
24
|
+
if field.downcase == 'false' || field == '0'
|
25
25
|
return false
|
26
26
|
else
|
27
27
|
raise ArgumentError, "invalid value for Boolean() '#{field}'"
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
# Converters used to verify the field data is valid.
|
31
|
+
# Converters used to verify the field data is valid.
|
32
32
|
# If a conversion fails, an exception is thrown together
|
33
|
-
# with a descriptive error message pointing to the field
|
33
|
+
# with a descriptive error message pointing to the field
|
34
34
|
# where the error occured.
|
35
35
|
Converters = Hash.new {|h,k| h[k] = lambda { |field| field }}.
|
36
36
|
merge!({string: lambda { |field| field },
|
37
37
|
date: lambda { |field| Date.parse(field).to_s },
|
38
|
-
datetime: lambda { |field| DateTime.parse(field).to_s },
|
38
|
+
datetime: lambda { |field| DateTime.parse(field).to_s },
|
39
39
|
# Convert to DateTime, because DateTime also checks if the argument is valid
|
40
40
|
time: lambda { |field| DateTime.parse(field).to_s },
|
41
41
|
integer: lambda { |field| Integer(field) },
|
@@ -61,7 +61,7 @@ module RStore
|
|
61
61
|
def extract_from_schema target
|
62
62
|
|
63
63
|
schema = @schema.dup
|
64
|
-
|
64
|
+
|
65
65
|
# Delete primary key column entry
|
66
66
|
schema.delete_if do |(_, property_hash)|
|
67
67
|
property_hash[:primary_key] == true
|
@@ -71,11 +71,33 @@ module RStore
|
|
71
71
|
# Sequel handles Time as Datetime:
|
72
72
|
type = property_hash[target]
|
73
73
|
#type = (type == :time) ? :datetime : type
|
74
|
-
|
74
|
+
type
|
75
75
|
end
|
76
76
|
end
|
77
77
|
|
78
78
|
|
79
|
+
# If the option key :digit_seps is passed,
|
80
|
+
# remove the thousands separator and convert the decimal mark (if present)
|
81
|
+
# into a point (.) that is understood by Ruby.
|
82
|
+
# The purpose of this function is to allow smooth parsing of numbers
|
83
|
+
# written in another standard than used in the US.
|
84
|
+
# Example: 100,000.34 is written as 100.000,34 in Germany.
|
85
|
+
# See also: http://en.wikipedia.org/wiki/Decimal_mark
|
86
|
+
def normalize_digit_separators num_as_string, separators
|
87
|
+
# Test if :digit_seps has been passed as an option
|
88
|
+
return num_as_string if separators.nil?
|
89
|
+
|
90
|
+
thousands_sep = separators[0]
|
91
|
+
decimal_mark = separators[1]
|
92
|
+
|
93
|
+
default_decimal_mark = Configuration.default_file_options[:digit_seps][1]
|
94
|
+
|
95
|
+
# Remove thousands separator first,so that is does not infer with the second step
|
96
|
+
# of replacing the decimal mark with the default.
|
97
|
+
num_as_string.gsub(thousands_sep, '').gsub(decimal_mark, default_decimal_mark)
|
98
|
+
end
|
99
|
+
|
100
|
+
|
79
101
|
# Returns @table with converted fields if no error is thrown, nil otherwise
|
80
102
|
def convert
|
81
103
|
content = @data.content.dup
|
@@ -84,19 +106,19 @@ module RStore
|
|
84
106
|
|
85
107
|
convert_row(row, row_index)
|
86
108
|
end
|
87
|
-
@state = :converted
|
109
|
+
@state = :converted
|
88
110
|
Data.new(@data.path, converted, @state, @data.options)
|
89
111
|
end
|
90
112
|
|
91
113
|
|
92
|
-
|
114
|
+
|
93
115
|
def convert_row row, row_index
|
94
|
-
# CSV.parse adjusts the size of each row to equal the size of the longest row
|
116
|
+
# CSV.parse adjusts the size of each row to equal the size of the longest row
|
95
117
|
# by adding nil where necessary.
|
96
118
|
error_message = <<-ERROR.gsub(/^\s+/,'')
|
97
119
|
Row length does not match number of columns. Please verify that:
|
98
120
|
1. The database table fits the csv table data
|
99
|
-
2. There is no primary key on a data column (you always need to
|
121
|
+
2. There is no primary key on a data column (you always need to
|
100
122
|
define a separate column for an auto-incrementing primary key)
|
101
123
|
ERROR
|
102
124
|
|
@@ -130,7 +152,10 @@ module RStore
|
|
130
152
|
|
131
153
|
|
132
154
|
def convert_type column_type, field
|
133
|
-
|
155
|
+
types = [:float, :bigdecimal, :integer, :numeric]
|
156
|
+
value = types.include?(column_type) ? normalize_digit_separators(field, @data.options[:digit_seps]) : field
|
157
|
+
|
158
|
+
Converters[column_type][value]
|
134
159
|
end
|
135
160
|
|
136
161
|
|
@@ -141,4 +166,4 @@ module RStore
|
|
141
166
|
|
142
167
|
end
|
143
168
|
end
|
144
|
-
|
169
|
+
|
data/lib/rstore/csv.rb
CHANGED
@@ -14,19 +14,19 @@ module RStore
|
|
14
14
|
|
15
15
|
#@return [BaseDB] a subclass of {RStore::BaseDB}
|
16
16
|
attr_reader :database
|
17
|
-
#@return [BaseTable] a sublcass of {RStore::BaseTable}
|
17
|
+
#@return [BaseTable] a sublcass of {RStore::BaseTable}
|
18
18
|
attr_reader :table
|
19
19
|
#@return [Array<Data>] holds `RStore::Data` objects that are used internally to store information from a data source.
|
20
20
|
attr_reader :data_array
|
21
|
-
|
22
21
|
|
23
|
-
|
24
|
-
#
|
22
|
+
|
23
|
+
# This constructor takes a block yielding an implicit instance of _self_.
|
24
|
+
# Within the block, the following methods need to be called:
|
25
25
|
#
|
26
26
|
# * {#from}
|
27
27
|
# * {#to}
|
28
28
|
# * {#run}
|
29
|
-
# @example
|
29
|
+
# @example
|
30
30
|
# RStore::CSV.new do
|
31
31
|
# from '../easter/children', :recursive => true # select a directory or
|
32
32
|
# from '../christmas/children/toys.csv' # file, or
|
@@ -50,26 +50,31 @@ module RStore
|
|
50
50
|
end
|
51
51
|
|
52
52
|
|
53
|
-
# Specify the source of the csv file(s)
|
53
|
+
# Specify the source of the csv file(s)
|
54
54
|
# There can be several calls to this method on given instance of `RStore::CSV`.
|
55
55
|
# This method has to be called before {#run}.
|
56
56
|
# @overload from(source, options)
|
57
57
|
# @param [String] source The relative or full path to a directory, file, or an URL
|
58
|
-
# @param [Hash] options The options used to customize fetching and parsing of csv data
|
59
|
-
# @option options [Boolean] :has_headers When set to false, the first line of a file is processed as data, otherwise it is discarded.
|
58
|
+
# @param [Hash] options The options used to customize fetching and parsing of csv data
|
59
|
+
# @option options [Boolean] :has_headers When set to false, the first line of a file is processed as data, otherwise it is discarded.
|
60
60
|
# (default: `true`)
|
61
|
-
# @option options [Boolean] :recursive When set to true and a directory is given, recursively search for files. Non-csv files are skipped.
|
61
|
+
# @option options [Boolean] :recursive When set to true and a directory is given, recursively search for files. Non-csv files are skipped.
|
62
62
|
# (default: `false`]
|
63
|
-
# @option options [String] :selector Mandatory css selector
|
63
|
+
# @option options [String] :selector Mandatory css selector when fetching data from an URL. Uses the same syntax as {http://nokogiri.org/ Nokogiri}, default: `""`
|
64
64
|
# @option options [String] :col_sep The String placed between each field. (default: `","`)
|
65
|
-
# @option options [String, Symbol] :row_sep The String appended to the end of each row.
|
65
|
+
# @option options [String, Symbol] :row_sep The String appended to the end of each row.
|
66
66
|
# (default: `:auto`)
|
67
67
|
# @option options [String] :quote_car The character used to quote fields.
|
68
68
|
# (default: `'"'`)
|
69
69
|
# @option options [Integer, Nil] :field_size_limit The maximum size CSV will read ahead looking for the closing quote for a field.
|
70
70
|
# (default: `nil`)
|
71
71
|
# @option options [Boolean] :skip_blanks When set to a true value, CSV will skip over any rows with no content.
|
72
|
-
# (default: `false`)
|
72
|
+
# (default: `false`)
|
73
|
+
# @option options [Array] :digit_seps The *thousands separator* and *decimal mark* used for numbers in the data source
|
74
|
+
# (default: `[',', '.']`).
|
75
|
+
# Different countries use different thousands separators and decimal marks, and setting this options ensures that
|
76
|
+
# parsing of these numbers succeeds. Note that all numbers will still be *stored* in the format that Ruby recognizes,
|
77
|
+
# that is with a point (.) as the decimal mark.
|
73
78
|
# @overload from(source)
|
74
79
|
# @param [String] source The relative or full path to a directory, file, or an URL. The default options will be used.
|
75
80
|
# @return [void]
|
@@ -90,7 +95,7 @@ module RStore
|
|
90
95
|
|
91
96
|
# Choose the database table to store the csv data into.
|
92
97
|
# This method has to be called before {#run}.
|
93
|
-
# @param [String] db_table The names of the database and table, separated by a dot, e.g. 'database.table'.
|
98
|
+
# @param [String] db_table The names of the database and table, separated by a dot, e.g. 'database.table'.
|
94
99
|
# The name of the database has to correspond to a subclass of `RStore::BaseDB`:
|
95
100
|
# CompanyDB < RStore::BaseDB -> 'company'
|
96
101
|
# The name of the table has to correspond to a subclass of `RStore::BaseTable`:
|
@@ -125,13 +130,13 @@ module RStore
|
|
125
130
|
# Both methods, {#from} and {#to}, have to be called before this method.
|
126
131
|
# @return [void]
|
127
132
|
def run
|
128
|
-
return if ran_once? # Ignore subsequent calls to #run
|
133
|
+
return if ran_once? # Ignore subsequent calls to #run
|
129
134
|
raise Exception, "At least one method 'from' has to be called before method 'run'" unless @from == true
|
130
135
|
raise Exception, "Method 'to' has to be called before method 'run'" unless @to == true
|
131
136
|
|
132
137
|
@data_hash.each do |path, data|
|
133
138
|
content = read_data(data)
|
134
|
-
@data_array << Data.new(path, content, :raw, data.options)
|
139
|
+
@data_array << Data.new(path, content, :raw, data.options)
|
135
140
|
end
|
136
141
|
|
137
142
|
@database.connect do |db|
|
@@ -191,7 +196,7 @@ module RStore
|
|
191
196
|
end
|
192
197
|
else
|
193
198
|
content = File.read(path)
|
194
|
-
end
|
199
|
+
end
|
195
200
|
|
196
201
|
raise ArgumentError, "Empty content!" if content.empty?
|
197
202
|
|
@@ -200,7 +205,7 @@ module RStore
|
|
200
205
|
logger.log(:fetch, e)
|
201
206
|
logger.error
|
202
207
|
end
|
203
|
-
|
208
|
+
|
204
209
|
content
|
205
210
|
end
|
206
211
|
|
@@ -215,7 +220,7 @@ module RStore
|
|
215
220
|
# http://sequel.rubyforge.org/rdoc/files/doc/release_notes/2_10_0_txt.html
|
216
221
|
Sequel::MySQL.default_engine = 'InnoDB'
|
217
222
|
# http://stackoverflow.com/questions/1671401/unable-to-output-mysql-tables-which-involve-dates-in-sequel
|
218
|
-
Sequel::MySQL.convert_invalid_date_time = nil
|
223
|
+
Sequel::MySQL.convert_invalid_date_time = nil
|
219
224
|
end
|
220
225
|
end
|
221
226
|
|
@@ -231,9 +236,9 @@ module RStore
|
|
231
236
|
# @return [void]
|
232
237
|
# @yieldparam [Sequel::Dataset] table The dataset of your table
|
233
238
|
# @example
|
234
|
-
# RStore::CSV.query('company.products') do |table| # table = Sequel::Dataset object
|
235
|
-
# table.all # fetch everything
|
236
|
-
# table.all[3] # fetch row number 4
|
239
|
+
# RStore::CSV.query('company.products') do |table| # table = Sequel::Dataset object
|
240
|
+
# table.all # fetch everything
|
241
|
+
# table.all[3] # fetch row number 4
|
237
242
|
# table.filter(:id => 2).update(:on_stock => true) # update entry
|
238
243
|
# table.filter(:id => 3).delete # delete entry
|
239
244
|
# end
|
@@ -244,8 +249,8 @@ module RStore
|
|
244
249
|
end
|
245
250
|
end
|
246
251
|
|
247
|
-
|
248
|
-
|
252
|
+
|
253
|
+
|
249
254
|
#@private
|
250
255
|
def self.delimiter_correct? name
|
251
256
|
!!(name =~ /^[^\.]+\.[^\.]+$/)
|
@@ -258,11 +263,11 @@ module RStore
|
|
258
263
|
end
|
259
264
|
|
260
265
|
|
261
|
-
# Change default options recognized by {#from}
|
262
|
-
# The new option values apply to all following instances of `RStore::CSV`
|
263
|
-
# Options can be reset to their defaults by calling {.reset_default_options}
|
264
|
-
# See {#from} for a list of all options and their default values.
|
265
|
-
# @param [Hash] options Keys from default options with their respective new values.
|
266
|
+
# Change default options recognized by {#from}
|
267
|
+
# The new option values apply to all following instances of `RStore::CSV`
|
268
|
+
# Options can be reset to their defaults by calling {.reset_default_options}
|
269
|
+
# See {#from} for a list of all options and their default values.
|
270
|
+
# @param [Hash] options Keys from default options with their respective new values.
|
266
271
|
# @return [void]
|
267
272
|
# @example
|
268
273
|
# # Search directories recursively and handle the first row of a file as data by default
|
@@ -271,11 +276,11 @@ module RStore
|
|
271
276
|
Configuration.change_default_options(options)
|
272
277
|
end
|
273
278
|
|
274
|
-
|
279
|
+
|
275
280
|
|
276
281
|
# Reset the options recognized by {#from} to their default values.
|
277
282
|
# @return [void]
|
278
|
-
# @example
|
283
|
+
# @example
|
279
284
|
# RStore::CSV.reset_default_options
|
280
285
|
def self.reset_default_options
|
281
286
|
Configuration.reset_default_options
|
@@ -285,4 +290,4 @@ module RStore
|
|
285
290
|
end
|
286
291
|
end
|
287
292
|
|
288
|
-
|
293
|
+
|
data/lib/rstore/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rstore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-12-08 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &8958500 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *8958500
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &8957280 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,9 +32,9 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
-
description: ! " RStore makes batch processing of csv files a breeze
|
37
|
-
fetches data files, directories, URLs\n :: Customizable using additional options
|
35
|
+
version_requirements: *8957280
|
36
|
+
description: ! " RStore makes batch processing of csv files a breeze.\n Automatically
|
37
|
+
fetches data files, directories, URLs\n :: Customizable using additional options\n
|
38
38
|
\ :: Validation of field values\n :: Descriptive error messages\n :: Safe and
|
39
39
|
transparent data storage using database transactions\n"
|
40
40
|
email: stefan.rohlfing@gmail.com
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project: rstore
|
85
|
-
rubygems_version: 1.8.
|
85
|
+
rubygems_version: 1.8.11
|
86
86
|
signing_key:
|
87
87
|
specification_version: 3
|
88
88
|
summary: RStore - A library for easy batch storage of csv data into a database
|