davidrichards-etl 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 5
4
+ :patch: 6
data/lib/etl/csv_et.rb CHANGED
@@ -19,34 +19,8 @@ module CSV
19
19
 
20
20
  # Attempts to get a string from a file, a uri, or a string
21
21
  def extract
22
- obj = self.options.fetch(:source, nil)
23
- extract_locally(obj) or extract_remotely(obj) or extract_from_string(obj)
24
- raise ArgumentError, "Could not determine what #{obj.inspect} was. CSV::ET cannot work with this data." unless @raw
25
- end
26
-
27
- # Handles local filename cases, reading the contents of the file.
28
- def extract_locally(filename)
29
- @raw = File.read(filename) if File.exist?(filename)
30
- ET.logger.info "Extracted the data from from filesystem" if @raw
31
- @raw ? true : false
32
- end
33
-
34
- # Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
35
- def extract_remotely(uri)
36
- begin
37
- open(uri) {|f| @raw = f.read}
38
- ET.logger.info "Extracted the data from a remote location."
39
- return true
40
- rescue
41
- ET.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
42
- return false
43
- end
44
- end
45
-
46
- # If this is a string, assumes that the contents of the string are CSV contents.
47
- def extract_from_string(string)
48
- @raw = string if string.is_a?(String)
49
- @raw ? true : false
22
+ source = self.options.fetch(:source, nil)
23
+ @raw = OpenContent::Extractor.process(source, ET.logger)
50
24
  end
51
25
 
52
26
  def transform
@@ -0,0 +1,19 @@
1
+ # Requires data_frame (sudo gem install davidrichards-data_frame)
2
+ # gem 'davidrichards-data_frame'
3
+ # require 'data_frame'
4
+ #
5
+ # # This is a simple tool that converts RDF to DataFrames. It uses the
6
+ # # subjects as the rows, the objects as the columns, and the predicates
7
+ # # as the values. This can make the data much more accessible by more
8
+ # # analysis tools.
9
+ # class RDF2DataFrame < ETL
10
+ #
11
+ # def extract
12
+ # source = self.options.fetch(:source, nil)
13
+ # @raw = OpenContent::Extractor.process(source, RDF2DataFrame.logger)
14
+ # end
15
+ #
16
+ # def transform
17
+ # # TODO
18
+ # end
19
+ # end
@@ -0,0 +1 @@
1
+ Dir.glob("#{File.dirname(__FILE__)}/open_content/*.rb").each { |file| require file }
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+ module OpenContent
3
+ class Extractor
4
+ class << self
5
+
6
+ attr_reader :logger
7
+
8
+ def process(source, logger)
9
+ @logger = logger
10
+ extract_locally(source) or extract_remotely(source) or extract_from_string(source)
11
+ raise ArgumentError, "Could not determine what #{source.inspect} was. Cannot extract this data." unless @raw
12
+ @raw
13
+ end
14
+
15
+ protected
16
+ # Handles local filename cases, reading the contents of the file.
17
+ def extract_locally(filename)
18
+ @raw = File.read(filename) if File.exist?(filename)
19
+ self.logger.info "Extracted the data from from filesystem" if @raw
20
+ @raw ? true : false
21
+ end
22
+
23
+ # Handles remote uri cases, reading the remote resource with open-uri, part of the Standard Library
24
+ def extract_remotely(uri)
25
+ begin
26
+ open(uri) {|f| @raw = f.read}
27
+ self.logger.info "Extracted the data from a remote location."
28
+ return true
29
+ rescue
30
+ self.logger.info "Tested whether #{uri} was a remote resource. Failed to read it."
31
+ return false
32
+ end
33
+ end
34
+
35
+ # If this is a string, assumes that the contents of the string are CSV contents.
36
+ def extract_from_string(string)
37
+ @raw = string if string.is_a?(String)
38
+ @raw ? true : false
39
+ end
40
+
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-03 00:00:00 -07:00
12
+ date: 2009-08-09 00:00:00 -07:00
13
13
  default_executable: etl
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -50,12 +50,16 @@ files:
50
50
  - lib/etl/bucket.rb
51
51
  - lib/etl/csv_et.rb
52
52
  - lib/etl/etl.rb
53
+ - lib/etl/rdf_2_data_frame.rb
53
54
  - lib/etl/time_bucket.rb
54
55
  - lib/etl/xml_et.rb
55
56
  - lib/etl.rb
56
57
  - lib/helpers
57
58
  - lib/helpers/array.rb
58
59
  - lib/helpers/observation.rb
60
+ - lib/helpers/open_content
61
+ - lib/helpers/open_content/extractor.rb
62
+ - lib/helpers/open_content.rb
59
63
  - lib/helpers/open_struct.rb
60
64
  - lib/helpers/string.rb
61
65
  - lib/helpers/symbol.rb