davidrichards-etl 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 5
4
+ :patch: 6
data/lib/etl/csv_et.rb CHANGED
@@ -19,34 +19,8 @@ module CSV
19
19
 
20
20
  # Attempts to get a string from a file, a uri, or a string
21
21
  def extract
22
- obj = self.options.fetch(:source, nil)
23
- extract_locally(obj) or extract_remotely(obj) or extract_from_string(obj)
24
- raise ArgumentError, "Could not determine what #{obj.inspect} was. CSV::ET cannot work with this data." unless @raw
25
- end
26
-
27
- # Handles local filename cases, reading the contents of the file.
28
- def extract_locally(filename)
29
- @raw = File.read(filename) if File.exist?(filename)
30
- ET.logger.info "Extracted the data from from filesystem" if @raw
31
- @raw ? true : false
32
- end
33
-
34
- # Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
35
- def extract_remotely(uri)
36
- begin
37
- open(uri) {|f| @raw = f.read}
38
- ET.logger.info "Extracted the data from a remote location."
39
- return true
40
- rescue
41
- ET.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
42
- return false
43
- end
44
- end
45
-
46
- # If this is a string, assumes that the contents of the string are CSV contents.
47
- def extract_from_string(string)
48
- @raw = string if string.is_a?(String)
49
- @raw ? true : false
22
+ source = self.options.fetch(:source, nil)
23
+ @raw = OpenContent::Extractor.process(source, ET.logger)
50
24
  end
51
25
 
52
26
  def transform
@@ -0,0 +1,19 @@
1
+ # Requires data_frame (sudo gem install davidrichards-data_frame)
2
+ # gem 'davidrichards-data_frame'
3
+ # require 'data_frame'
4
+ #
5
+ # # This is a simple tool that converts RDF to DataFrames. It uses the
6
+ # # subjects as the rows, the objects as the columns, and the predicates
7
+ # # as the values. This can make the data much more accessible by more
8
+ # # analysis tools.
9
+ # class RDF2DataFrame < ETL
10
+ #
11
+ # def extract
12
+ # source = self.options.fetch(:source, nil)
13
+ # @raw = OpenContent::Extractor.process(source, RDF2DataFrame.logger)
14
+ # end
15
+ #
16
+ # def transform
17
+ # # TODO
18
+ # end
19
+ # end
@@ -0,0 +1 @@
1
+ Dir.glob("#{File.dirname(__FILE__)}/open_content/*.rb").each { |file| require file }
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+ module OpenContent
3
+ class Extractor
4
+ class << self
5
+
6
+ attr_reader :logger
7
+
8
+ def process(source, logger)
9
+ @logger = logger
10
+ extract_locally(source) or extract_remotely(source) or extract_from_string(source)
11
+ raise ArgumentError, "Could not determine what #{source.inspect} was. Cannot extract this data." unless @raw
12
+ @raw
13
+ end
14
+
15
+ protected
16
+ # Handles local filename cases, reading the contents of the file.
17
+ def extract_locally(filename)
18
+ @raw = File.read(filename) if File.exist?(filename)
19
+ self.logger.info "Extracted the data from from filesystem" if @raw
20
+ @raw ? true : false
21
+ end
22
+
23
+ # Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
24
+ def extract_remotely(uri)
25
+ begin
26
+ open(uri) {|f| @raw = f.read}
27
+ self.logger.info "Extracted the data from a remote location."
28
+ return true
29
+ rescue
30
+ self.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
31
+ return false
32
+ end
33
+ end
34
+
35
+ # If this is a string, assumes that the contents of the string are CSV contents.
36
+ def extract_from_string(string)
37
+ @raw = string if string.is_a?(String)
38
+ @raw ? true : false
39
+ end
40
+
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-03 00:00:00 -07:00
12
+ date: 2009-08-09 00:00:00 -07:00
13
13
  default_executable: etl
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -50,12 +50,16 @@ files:
50
50
  - lib/etl/bucket.rb
51
51
  - lib/etl/csv_et.rb
52
52
  - lib/etl/etl.rb
53
+ - lib/etl/rdf_2_data_frame.rb
53
54
  - lib/etl/time_bucket.rb
54
55
  - lib/etl/xml_et.rb
55
56
  - lib/etl.rb
56
57
  - lib/helpers
57
58
  - lib/helpers/array.rb
58
59
  - lib/helpers/observation.rb
60
+ - lib/helpers/open_content
61
+ - lib/helpers/open_content/extractor.rb
62
+ - lib/helpers/open_content.rb
59
63
  - lib/helpers/open_struct.rb
60
64
  - lib/helpers/string.rb
61
65
  - lib/helpers/symbol.rb