davidrichards-data_frame 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 17
4
+ :patch: 18
@@ -20,5 +20,6 @@ $:.unshift(File.dirname(__FILE__))
20
20
  require 'data_frame/callback_array'
21
21
  require 'data_frame/transposable_array'
22
22
  require 'data_frame/parameter_capture'
23
+ require 'data_frame/arff'
23
24
  require 'data_frame/data_frame'
24
25
  require 'data_frame/model'
@@ -0,0 +1,45 @@
1
+ # Turns a data frame into ARFF-formatted content.
2
+ module ARFF
3
+
4
+ # Used in arff, but generally useful.
5
+ def to_csv(include_header=true)
6
+ value = include_header ? self.labels.map{|e| e.to_s}.join(',') + "\n" : ''
7
+ self.items.inject(value) do |list, e|
8
+ list << e.map {|cell| cell.to_s}.join(',') + "\n"
9
+ end
10
+ end
11
+
12
+ def to_arff
13
+ arff_header + to_csv(false)
14
+ end
15
+
16
+ protected
17
+ def arff_attributes
18
+ container = defined?(Dictionary) ? Dictionary.new : Hash.new
19
+
20
+ self.labels.inject(container) do |list, e|
21
+ list[e] = self.render_column(e).categories
22
+ end
23
+ end
24
+
25
+ def arff_formatted_attributes
26
+ self.labels.inject('') do |str, e|
27
+ val = "{" + self.render_column(e).categories.map{|x| x.to_s}.join(',') + "}"
28
+ str << "@attribute #{e} #{val}\n"
29
+ end
30
+ end
31
+
32
+ def arff_relation
33
+ self.name ? self.name.to_underscore_sym.to_s : 'unamed_relation'
34
+ end
35
+
36
+ def arff_header
37
+ %[@relation #{arff_relation}
38
+
39
+ #{arff_formatted_attributes}
40
+ @data
41
+ ]
42
+ end
43
+
44
+ alias :arff_items :to_csv
45
+ end
@@ -18,13 +18,25 @@ class DataFrame
18
18
  # This returns bar where 'foo' was found and 'foo' everywhere else.
19
19
  def from_csv(obj, opts={})
20
20
  labels, table = infer_csv_contents(obj, opts)
21
+ name = infer_name_from_contents(obj, opts)
21
22
  return nil unless labels and table
22
23
  df = new(*labels)
23
24
  df.import(table)
25
+ df.name = name
24
26
  df
25
27
  end
26
28
 
27
29
  protected
30
+
31
+ # Only works for names sources, urls and files
32
+ def infer_name_from_contents(obj, opts={})
33
+ begin
34
+ File.split(obj).last.split('.')[0..-2].join('.').titleize
35
+ rescue
36
+ nil
37
+ end
38
+ end
39
+
28
40
  def infer_csv_contents(obj, opts={})
29
41
  contents = File.read(obj) if File.exist?(obj)
30
42
  begin
@@ -45,6 +57,9 @@ class DataFrame
45
57
  def default_csv_opts; {:converters => :all}; end
46
58
  end
47
59
 
60
+ # Include the methods from arff.rb
61
+ include ARFF
62
+
48
63
  # Loads a batch of rows. Expects an array of arrays, else you don't
49
64
  # know what you have.
50
65
  def import(rows)
@@ -64,6 +79,9 @@ class DataFrame
64
79
  # The items stored in the frame
65
80
  attr_reader :items
66
81
 
82
+ # An optional name, useful for arff files
83
+ attr_accessor :name
84
+
67
85
  def initialize(*labels)
68
86
  @labels = labels.map {|e| e.to_underscore_sym }
69
87
  @items = TransposableArray.new
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe "ARFF" do
4
+ before do
5
+ @df = DataFrame.from_csv(File.expand_path(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.csv')))
6
+ end
7
+
8
+ it "should allow a data frame to be expressed as an arff-formatted file" do
9
+ @df.to_arff.should eql(basic_arff)
10
+ end
11
+
12
+ it "should add a to_csv method" do
13
+ @df.to_csv.should eql(%{x,y,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
14
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
15
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
16
+ })
17
+ end
18
+
19
+ it "should allow a non-header export for to_csv" do
20
+ @df.to_csv(false).should eql(%{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
21
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
22
+ })
23
+ end
24
+ end
25
+
26
+ def basic_arff
27
+ %[@relation basic
28
+
29
+ @attribute x {7}
30
+ @attribute y {4,5}
31
+ @attribute month {mar,oct}
32
+ @attribute day {fri,tue}
33
+ @attribute ffmc {86.2,90.6}
34
+ @attribute dmc {26.2,35.4}
35
+ @attribute dc {94.3,669.1}
36
+ @attribute isi {5.1,6.7}
37
+ @attribute temp {8.2,18}
38
+ @attribute rh {33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51}
39
+ @attribute wind {0.9,6.7}
40
+ @attribute rain {0}
41
+ @attribute area {0}
42
+
43
+ @data
44
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
45
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
46
+ ]
47
+ end
@@ -12,6 +12,10 @@ describe DataFrame do
12
12
  df.labels.should eql(@labels)
13
13
  end
14
14
 
15
+ it "should have an optional name" do
16
+ @df.name = :some_name
17
+ @df.name.should eql(:some_name)
18
+ end
15
19
  it "should initialize with an empty items list" do
16
20
  @df.items.should be_is_a(TransposableArray)
17
21
  @df.items.should be_empty
@@ -91,6 +95,18 @@ describe DataFrame do
91
95
  @df.x.should eql([7,7])
92
96
  @df.area.should eql([0,0])
93
97
  end
98
+
99
+ it "should infer a name when importing from a file" do
100
+ filename = "/tmp/data_frame_spec.csv"
101
+ contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
102
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
103
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
104
+ }
105
+ File.open(filename, 'w') {|f| f.write contents}
106
+ @df = DataFrame.from_csv(filename)
107
+ @df.name.should eql('Data Frame Spec')
108
+ `rm -rf #{filename}`
109
+ end
94
110
  end
95
111
 
96
112
  it "should be able to remove a column" do
@@ -0,0 +1,3 @@
1
+ X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
2
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
3
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-29 00:00:00 -07:00
12
+ date: 2009-09-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -54,6 +54,7 @@ files:
54
54
  - README.rdoc
55
55
  - VERSION.yml
56
56
  - lib/data_frame
57
+ - lib/data_frame/arff.rb
57
58
  - lib/data_frame/callback_array.rb
58
59
  - lib/data_frame/data_frame.rb
59
60
  - lib/data_frame/model.rb
@@ -65,12 +66,15 @@ files:
65
66
  - lib/ext/string.rb
66
67
  - lib/ext/symbol.rb
67
68
  - spec/data_frame
69
+ - spec/data_frame/arff_spec.rb
68
70
  - spec/data_frame/callback_array_spec.rb
69
71
  - spec/data_frame/data_frame_spec.rb
70
72
  - spec/data_frame/model_spec.rb
71
73
  - spec/data_frame/parameter_capture_spec.rb
72
74
  - spec/data_frame/transposable_array_spec.rb
73
75
  - spec/data_frame_spec.rb
76
+ - spec/fixtures
77
+ - spec/fixtures/basic.csv
74
78
  - spec/spec_helper.rb
75
79
  has_rdoc: true
76
80
  homepage: http://github.com/davidrichards/data_frame