davidrichards-etl 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/lib/etl/csv_et.rb +2 -28
- data/lib/etl/rdf_2_data_frame.rb +19 -0
- data/lib/helpers/open_content.rb +1 -0
- data/lib/helpers/open_content/extractor.rb +43 -0
- metadata +6 -2
data/VERSION.yml
CHANGED
data/lib/etl/csv_et.rb
CHANGED
@@ -19,34 +19,8 @@ module CSV
|
|
19
19
|
|
20
20
|
# Attempts to get a string from a file, a uri, or a string
|
21
21
|
def extract
|
22
|
-
|
23
|
-
|
24
|
-
raise ArgumentError, "Could not determine what #{obj.inspect} was. CSV::ET cannot work with this data." unless @raw
|
25
|
-
end
|
26
|
-
|
27
|
-
# Handles local filename cases, reading the contents of the file.
|
28
|
-
def extract_locally(filename)
|
29
|
-
@raw = File.read(filename) if File.exist?(filename)
|
30
|
-
ET.logger.info "Extracted the data from from filesystem" if @raw
|
31
|
-
@raw ? true : false
|
32
|
-
end
|
33
|
-
|
34
|
-
# Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
|
35
|
-
def extract_remotely(uri)
|
36
|
-
begin
|
37
|
-
open(uri) {|f| @raw = f.read}
|
38
|
-
ET.logger.info "Extracted the data from a remote location."
|
39
|
-
return true
|
40
|
-
rescue
|
41
|
-
ET.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
|
42
|
-
return false
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# If this is a string, assumes that the contents of the string are CSV contents.
|
47
|
-
def extract_from_string(string)
|
48
|
-
@raw = string if string.is_a?(String)
|
49
|
-
@raw ? true : false
|
22
|
+
source = self.options.fetch(:source, nil)
|
23
|
+
@raw = OpenContent::Extractor.process(source, ET.logger)
|
50
24
|
end
|
51
25
|
|
52
26
|
def transform
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Requires data_frame (sudo gem install davidrichards-data_frame)
|
2
|
+
# gem 'davidrichards-data_frame'
|
3
|
+
# require 'data_frame'
|
4
|
+
#
|
5
|
+
# # This is a simple tool that converts RDF to DataFrames. It uses the
|
6
|
+
# # subjects as the rows, the objects as the columns, and the predicates
|
7
|
+
# # as the values. This can make the data much more accessible by more
|
8
|
+
# # analysis tools.
|
9
|
+
# class RDF2DataFrame < ETL
|
10
|
+
#
|
11
|
+
# def extract
|
12
|
+
# source = self.options.fetch(:source, nil)
|
13
|
+
# @raw = OpenContent::Extractor.process(source, RDF2DataFrame.logger)
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# def transform
|
17
|
+
# # TODO
|
18
|
+
# end
|
19
|
+
# end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob("#{File.dirname(__FILE__)}/open_content/*.rb").each { |file| require file }
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
module OpenContent
|
3
|
+
class Extractor
|
4
|
+
class << self
|
5
|
+
|
6
|
+
attr_reader :logger
|
7
|
+
|
8
|
+
def process(source, logger)
|
9
|
+
@logger = logger
|
10
|
+
extract_locally(source) or extract_remotely(source) or extract_from_string(source)
|
11
|
+
raise ArgumentError, "Could not determine what #{source.inspect} was. Cannot extract this data." unless @raw
|
12
|
+
@raw
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
# Handles local filename cases, reading the contents of the file.
|
17
|
+
def extract_locally(filename)
|
18
|
+
@raw = File.read(filename) if File.exist?(filename)
|
19
|
+
self.logger.info "Extracted the data from from filesystem" if @raw
|
20
|
+
@raw ? true : false
|
21
|
+
end
|
22
|
+
|
23
|
+
# Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
|
24
|
+
def extract_remotely(uri)
|
25
|
+
begin
|
26
|
+
open(uri) {|f| @raw = f.read}
|
27
|
+
self.logger.info "Extracted the data from a remote location."
|
28
|
+
return true
|
29
|
+
rescue
|
30
|
+
self.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# If this is a string, assumes that the contents of the string are CSV contents.
|
36
|
+
def extract_from_string(string)
|
37
|
+
@raw = string if string.is_a?(String)
|
38
|
+
@raw ? true : false
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: davidrichards-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Richards
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-08-
|
12
|
+
date: 2009-08-09 00:00:00 -07:00
|
13
13
|
default_executable: etl
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -50,12 +50,16 @@ files:
|
|
50
50
|
- lib/etl/bucket.rb
|
51
51
|
- lib/etl/csv_et.rb
|
52
52
|
- lib/etl/etl.rb
|
53
|
+
- lib/etl/rdf_2_data_frame.rb
|
53
54
|
- lib/etl/time_bucket.rb
|
54
55
|
- lib/etl/xml_et.rb
|
55
56
|
- lib/etl.rb
|
56
57
|
- lib/helpers
|
57
58
|
- lib/helpers/array.rb
|
58
59
|
- lib/helpers/observation.rb
|
60
|
+
- lib/helpers/open_content
|
61
|
+
- lib/helpers/open_content/extractor.rb
|
62
|
+
- lib/helpers/open_content.rb
|
59
63
|
- lib/helpers/open_struct.rb
|
60
64
|
- lib/helpers/string.rb
|
61
65
|
- lib/helpers/symbol.rb
|