csv_orm 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -9
- data/csv_orm-0.1.0.gem +0 -0
- data/lib/csv_orm/ingestor.rb +14 -2
- data/lib/csv_orm/query.rb +54 -2
- data/lib/csv_orm/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40f766dd6da056cc5ab425d390c33a86783b1c1f
|
4
|
+
data.tar.gz: da13ec654fac0cfddb2375f274f6f0f11f099ed4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aba4290e99c36ebd87b7cf0748cc25deb18c278e33346c6e71717bf6496501ca286f6ee2b733efbb05ae6e1c837320af21b92d96a806a6ed3fc7286933005aa0
|
7
|
+
data.tar.gz: 10d975ca6d8985ee36e044415ad1a95604ea6093cfbd973536beba0d9e98d10cabed1fa3b623aad709a9e6e632ec582d807996637b46bda686d5862fd14aa4ac
|
data/README.md
CHANGED
@@ -28,25 +28,42 @@ Or install it yourself as:
|
|
28
28
|
Now you can do activerecord like queries on the dataset. Currently it supports 3 methods:
|
29
29
|
|
30
30
|
```ruby
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#aggregate(:field1, :field2)
|
31
|
+
#where({key: 'value', other_key: 'other_value'}) (all conditions must be met)
|
32
|
+
#where_any({key: 'value', other_key: 'other_value'}) (like an 'or' condition)
|
33
|
+
#aggregate(:field1, :field2) (count unique values for each field)
|
34
34
|
```
|
35
35
|
|
36
36
|
```ruby
|
37
|
-
# show users who have have any admin access
|
38
|
-
my_data.where_any({admin: true, super_admin: true})
|
39
|
-
|
40
37
|
# show users who are admin and named 'Mike'
|
41
|
-
my_data.
|
38
|
+
my_data.where({admin: true, first_name: 'Mike'})
|
39
|
+
|
40
|
+
# show users who have admin access or have last name N-Z
|
41
|
+
my_data.where_any({admin: true, last_name: /^[n-zN-Z].*/})
|
42
42
|
|
43
43
|
# give me a break down of orders by their delivery status for users named 'Mike'
|
44
|
-
my_data.
|
45
|
-
#=> {
|
44
|
+
my_data.where({first_name: 'Mike'}).aggregate(:delivery_status, :admin)
|
45
|
+
#=> {
|
46
|
+
delivery_status: {placed: 10, processing: 22, shipped: 43, delivered: 25},
|
47
|
+
admin: { true: 45, false: 55 }
|
48
|
+
}
|
46
49
|
```
|
47
50
|
|
48
51
|
Maybe more will come...
|
49
52
|
|
53
|
+
Ok a little more...
|
54
|
+
|
55
|
+
Now supporting an option. `{smart: false}` - If you pass in a false flag, it will not try to convert time fields. Converting will happen internally on ingestion. So in other words, by default, it will take a string like `'2018-01-01'` and convert it to `1514764800`.
|
56
|
+
|
57
|
+
Also supporting multiple data types as queryable criteria. You can now do...
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
data.where({string_field: 'foo'}) # exact match
|
61
|
+
data.where({string_field: /\d+/}) # pattern match
|
62
|
+
data.where({array_field: ['a', 'b']}) # is one of
|
63
|
+
data.where({range_field: (start_day..end_day)}) # is within range, supports numbers/dates
|
64
|
+
```
|
65
|
+
|
66
|
+
|
50
67
|
## Development
|
51
68
|
|
52
69
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/csv_orm-0.1.0.gem
ADDED
Binary file
|
data/lib/csv_orm/ingestor.rb
CHANGED
@@ -2,12 +2,13 @@ module CsvOrm
|
|
2
2
|
class Ingestor
|
3
3
|
attr_accessor :file, :path, :headers, :headers_defined, :data_set
|
4
4
|
|
5
|
-
def initialize(file_path)
|
5
|
+
def initialize(file_path, options={})
|
6
6
|
@path = File.expand_path(file_path)
|
7
7
|
@file = File.open(path)
|
8
8
|
@headers = [] # will define in first iteration of loop
|
9
9
|
@headers_defined = false
|
10
10
|
@data_set = []
|
11
|
+
@smart = options[:smart] == false ? false : true
|
11
12
|
end
|
12
13
|
|
13
14
|
def parse
|
@@ -15,10 +16,21 @@ module CsvOrm
|
|
15
16
|
unless @headers_defined
|
16
17
|
@headers = row.map {|header| header.gsub(' ', '_').downcase.to_sym }
|
17
18
|
end
|
18
|
-
|
19
|
+
parsed_row = row.map {|field| infer_data_type(field) }
|
20
|
+
@data_set << OpenStruct.new(Hash[headers.zip(parsed_row)]) if @headers_defined
|
19
21
|
@headers_defined = true
|
20
22
|
end
|
21
23
|
@data_set
|
22
24
|
end
|
25
|
+
|
26
|
+
def infer_data_type(field)
|
27
|
+
# currently supporting time to integer conversion
|
28
|
+
return field.to_s unless @smart
|
29
|
+
date = DateTime.parse(field) rescue nil
|
30
|
+
if date
|
31
|
+
return date.to_time.to_i
|
32
|
+
end
|
33
|
+
field.to_s
|
34
|
+
end
|
23
35
|
end
|
24
36
|
end
|
data/lib/csv_orm/query.rb
CHANGED
@@ -22,18 +22,70 @@ module CsvOrm
|
|
22
22
|
agg
|
23
23
|
end
|
24
24
|
|
25
|
+
def explain(method, args)
|
26
|
+
case
|
27
|
+
when method == :where
|
28
|
+
build_expression('&&', args)
|
29
|
+
when method == :where_any
|
30
|
+
build_expression('||', args)
|
31
|
+
else
|
32
|
+
'not supported at this time'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
25
36
|
def where_any(attrs)
|
26
37
|
expression = build_expression('||', attrs)
|
27
38
|
self.class.new(@data.select {|row| eval(expression)});
|
28
39
|
end
|
29
40
|
|
30
|
-
def
|
41
|
+
def where(attrs)
|
31
42
|
expression = build_expression('&&', attrs)
|
32
43
|
self.class.new(@data.select {|row| eval(expression)});
|
33
44
|
end
|
34
45
|
|
46
|
+
def not(attrs)
|
47
|
+
expression = build_expression('&&', attrs)
|
48
|
+
self.class.new(@data.reject {|row| eval(expression)});
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_range_values(range)
|
52
|
+
exclude = range.exclude_end?
|
53
|
+
first, last = DateTime.parse(range.begin), DateTime.parse(range.end) rescue nil
|
54
|
+
|
55
|
+
if first && last
|
56
|
+
Range.new(first.to_time.to_i, last.to_time.to_i, exclude)
|
57
|
+
else
|
58
|
+
range
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def build_expression_part(key, value)
|
63
|
+
case
|
64
|
+
when value.class == String
|
65
|
+
"row.send(:#{key}) == '#{value}'"
|
66
|
+
when value.class == Regexp
|
67
|
+
"row.send(:#{key}).match(/#{value.source}/)"
|
68
|
+
when value.class == Array
|
69
|
+
"#{value}.include?(row.send(:#{key}))"
|
70
|
+
when value.class == Range
|
71
|
+
parsed_range_value = parse_range_values(value)
|
72
|
+
"(#{parsed_range_value}).cover?(row.send(:#{key}))"
|
73
|
+
when [TrueClass, FalseClass].include?(value.class)
|
74
|
+
"row.send(:#{key}) == '#{value.to_s}'"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
35
78
|
def build_expression(conditional, attrs)
|
36
|
-
|
79
|
+
string = ''
|
80
|
+
is_first = true
|
81
|
+
|
82
|
+
attrs.each do |k, v|
|
83
|
+
string << " #{conditional} " unless is_first
|
84
|
+
string << build_expression_part(k, v)
|
85
|
+
|
86
|
+
is_first = false
|
87
|
+
end
|
88
|
+
string
|
37
89
|
end
|
38
90
|
end
|
39
91
|
end
|
data/lib/csv_orm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv_orm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Lerner
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-02-
|
11
|
+
date: 2019-02-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- Rakefile
|
113
113
|
- bin/console
|
114
114
|
- bin/setup
|
115
|
+
- csv_orm-0.1.0.gem
|
115
116
|
- csv_orm.gemspec
|
116
117
|
- lib/csv_orm.rb
|
117
118
|
- lib/csv_orm/ingestor.rb
|