csv_orm 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -9
- data/csv_orm-0.1.0.gem +0 -0
- data/lib/csv_orm/ingestor.rb +14 -2
- data/lib/csv_orm/query.rb +54 -2
- data/lib/csv_orm/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f766dd6da056cc5ab425d390c33a86783b1c1f
|
4
|
+
data.tar.gz: da13ec654fac0cfddb2375f274f6f0f11f099ed4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aba4290e99c36ebd87b7cf0748cc25deb18c278e33346c6e71717bf6496501ca286f6ee2b733efbb05ae6e1c837320af21b92d96a806a6ed3fc7286933005aa0
|
7
|
+
data.tar.gz: 10d975ca6d8985ee36e044415ad1a95604ea6093cfbd973536beba0d9e98d10cabed1fa3b623aad709a9e6e632ec582d807996637b46bda686d5862fd14aa4ac
|
data/README.md
CHANGED
@@ -28,25 +28,42 @@ Or install it yourself as:
|
|
28
28
|
Now you can do activerecord like queries on the dataset. Currently it supports 3 methods:
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#aggregate(:field1, :field2)
|
31
|
+
#where({key: 'value', other_key: 'other_value'}) (all conditions must be met)
|
32
|
+
#where_any({key: 'value', other_key: 'other_value'}) (like an 'or' condition)
|
33
|
+
#aggregate(:field1, :field2) (count unique values for each field)
|
34
34
|
```
|
35
35
|
|
36
36
|
```ruby
|
37
|
-
# show users who have have any admin access
|
38
|
-
my_data.where_any({admin: true, super_admin: true})
|
39
|
-
|
40
37
|
# show users who are admin and named 'Mike'
|
41
|
-
my_data.
|
38
|
+
my_data.where({admin: true, first_name: 'Mike'})
|
39
|
+
|
40
|
+
# show users who have admin access or have last name N-Z
|
41
|
+
my_data.where_any({admin: true, last_name: /^[n-zN-Z].*/})
|
42
42
|
|
43
43
|
# give me a break down of orders by their delivery status for users named 'Mike'
|
44
|
-
my_data.
|
45
|
-
#=> {
|
44
|
+
my_data.where({first_name: 'Mike'}).aggregate(:delivery_status, :admin)
|
45
|
+
#=> {
|
46
|
+
delivery_status: {placed: 10, processing: 22, shipped: 43, delivered: 25},
|
47
|
+
admin: { true: 45, false: 55 }
|
48
|
+
}
|
46
49
|
```
|
47
50
|
|
48
51
|
Maybe more will come...
|
49
52
|
|
53
|
+
Ok a little more...
|
54
|
+
|
55
|
+
Now supporting an option. `{smart: false}` - If you pass in a false flag, it will not try to convert time fields. Converting will happen internally on ingestion. So in other words, by default, it will take a string like `'2018-01-01'` and convert it to `1514764800`.
|
56
|
+
|
57
|
+
Also supporting multiple data types as queryable criteria. You can now do...
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
data.where({string_field: 'foo'}) # exact match
|
61
|
+
data.where({string_field: /\d+/}) # pattern match
|
62
|
+
data.where({array_field: ['a', 'b']}) # is one of
|
63
|
+
data.where({range_field: (start_day..end_day)}) # is within range, supports numbers/dates
|
64
|
+
```
|
65
|
+
|
66
|
+
|
50
67
|
## Development
|
51
68
|
|
52
69
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/csv_orm-0.1.0.gem
ADDED
Binary file
|
data/lib/csv_orm/ingestor.rb
CHANGED
@@ -2,12 +2,13 @@ module CsvOrm
|
|
2
2
|
class Ingestor
|
3
3
|
attr_accessor :file, :path, :headers, :headers_defined, :data_set
|
4
4
|
|
5
|
-
def initialize(file_path)
|
5
|
+
def initialize(file_path, options={})
|
6
6
|
@path = File.expand_path(file_path)
|
7
7
|
@file = File.open(path)
|
8
8
|
@headers = [] # will define in first iteration of loop
|
9
9
|
@headers_defined = false
|
10
10
|
@data_set = []
|
11
|
+
@smart = options[:smart] == false ? false : true
|
11
12
|
end
|
12
13
|
|
13
14
|
def parse
|
@@ -15,10 +16,21 @@ module CsvOrm
|
|
15
16
|
unless @headers_defined
|
16
17
|
@headers = row.map {|header| header.gsub(' ', '_').downcase.to_sym }
|
17
18
|
end
|
18
|
-
|
19
|
+
parsed_row = row.map {|field| infer_data_type(field) }
|
20
|
+
@data_set << OpenStruct.new(Hash[headers.zip(parsed_row)]) if @headers_defined
|
19
21
|
@headers_defined = true
|
20
22
|
end
|
21
23
|
@data_set
|
22
24
|
end
|
25
|
+
|
26
|
+
def infer_data_type(field)
|
27
|
+
# currently supporting time to integer conversion
|
28
|
+
return field.to_s unless @smart
|
29
|
+
date = DateTime.parse(field) rescue nil
|
30
|
+
if date
|
31
|
+
return date.to_time.to_i
|
32
|
+
end
|
33
|
+
field.to_s
|
34
|
+
end
|
23
35
|
end
|
24
36
|
end
|
data/lib/csv_orm/query.rb
CHANGED
@@ -22,18 +22,70 @@ module CsvOrm
|
|
22
22
|
agg
|
23
23
|
end
|
24
24
|
|
25
|
+
def explain(method, args)
|
26
|
+
case
|
27
|
+
when method == :where
|
28
|
+
build_expression('&&', args)
|
29
|
+
when method == :where_any
|
30
|
+
build_expression('||', args)
|
31
|
+
else
|
32
|
+
'not supported at this time'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
25
36
|
def where_any(attrs)
|
26
37
|
expression = build_expression('||', attrs)
|
27
38
|
self.class.new(@data.select {|row| eval(expression)});
|
28
39
|
end
|
29
40
|
|
30
|
-
def
|
41
|
+
def where(attrs)
|
31
42
|
expression = build_expression('&&', attrs)
|
32
43
|
self.class.new(@data.select {|row| eval(expression)});
|
33
44
|
end
|
34
45
|
|
46
|
+
def not(attrs)
|
47
|
+
expression = build_expression('&&', attrs)
|
48
|
+
self.class.new(@data.reject {|row| eval(expression)});
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_range_values(range)
|
52
|
+
exclude = range.exclude_end?
|
53
|
+
first, last = DateTime.parse(range.begin), DateTime.parse(range.end) rescue nil
|
54
|
+
|
55
|
+
if first && last
|
56
|
+
Range.new(first.to_time.to_i, last.to_time.to_i, exclude)
|
57
|
+
else
|
58
|
+
range
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def build_expression_part(key, value)
|
63
|
+
case
|
64
|
+
when value.class == String
|
65
|
+
"row.send(:#{key}) == '#{value}'"
|
66
|
+
when value.class == Regexp
|
67
|
+
"row.send(:#{key}).match(/#{value.source}/)"
|
68
|
+
when value.class == Array
|
69
|
+
"#{value}.include?(row.send(:#{key}))"
|
70
|
+
when value.class == Range
|
71
|
+
parsed_range_value = parse_range_values(value)
|
72
|
+
"(#{parsed_range_value}).cover?(row.send(:#{key}))"
|
73
|
+
when [TrueClass, FalseClass].include?(value.class)
|
74
|
+
"row.send(:#{key}) == '#{value.to_s}'"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
35
78
|
def build_expression(conditional, attrs)
|
36
|
-
|
79
|
+
string = ''
|
80
|
+
is_first = true
|
81
|
+
|
82
|
+
attrs.each do |k, v|
|
83
|
+
string << " #{conditional} " unless is_first
|
84
|
+
string << build_expression_part(k, v)
|
85
|
+
|
86
|
+
is_first = false
|
87
|
+
end
|
88
|
+
string
|
37
89
|
end
|
38
90
|
end
|
39
91
|
end
|
data/lib/csv_orm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv_orm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Lerner
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- Rakefile
|
113
113
|
- bin/console
|
114
114
|
- bin/setup
|
115
|
+
- csv_orm-0.1.0.gem
|
115
116
|
- csv_orm.gemspec
|
116
117
|
- lib/csv_orm.rb
|
117
118
|
- lib/csv_orm/ingestor.rb
|