davidrichards-data_frame 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 0
4
- :patch: 17
4
+ :patch: 18
@@ -20,5 +20,6 @@ $:.unshift(File.dirname(__FILE__))
20
20
  require 'data_frame/callback_array'
21
21
  require 'data_frame/transposable_array'
22
22
  require 'data_frame/parameter_capture'
23
+ require 'data_frame/arff'
23
24
  require 'data_frame/data_frame'
24
25
  require 'data_frame/model'
@@ -0,0 +1,45 @@
1
+ # Turns a data frame into ARFF-formatted content.
2
+ module ARFF
3
+
4
+ # Used in arff, but generally useful.
5
+ def to_csv(include_header=true)
6
+ value = include_header ? self.labels.map{|e| e.to_s}.join(',') + "\n" : ''
7
+ self.items.inject(value) do |list, e|
8
+ list << e.map {|cell| cell.to_s}.join(',') + "\n"
9
+ end
10
+ end
11
+
12
+ def to_arff
13
+ arff_header + to_csv(false)
14
+ end
15
+
16
+ protected
17
+ def arff_attributes
18
+ container = defined?(Dictionary) ? Dictionary.new : Hash.new
19
+
20
+ self.labels.inject(container) do |list, e|
21
+ list[e] = self.render_column(e).categories
22
+ end
23
+ end
24
+
25
+ def arff_formatted_attributes
26
+ self.labels.inject('') do |str, e|
27
+ val = "{" + self.render_column(e).categories.map{|x| x.to_s}.join(',') + "}"
28
+ str << "@attribute #{e} #{val}\n"
29
+ end
30
+ end
31
+
32
+ def arff_relation
33
+ self.name ? self.name.to_underscore_sym.to_s : 'unamed_relation'
34
+ end
35
+
36
+ def arff_header
37
+ %[@relation #{arff_relation}
38
+
39
+ #{arff_formatted_attributes}
40
+ @data
41
+ ]
42
+ end
43
+
44
+ alias :arff_items :to_csv
45
+ end
@@ -18,13 +18,25 @@ class DataFrame
18
18
  # This returns bar where 'foo' was found and 'foo' everywhere else.
19
19
  def from_csv(obj, opts={})
20
20
  labels, table = infer_csv_contents(obj, opts)
21
+ name = infer_name_from_contents(obj, opts)
21
22
  return nil unless labels and table
22
23
  df = new(*labels)
23
24
  df.import(table)
25
+ df.name = name
24
26
  df
25
27
  end
26
28
 
27
29
  protected
30
+
31
+ # Only works for names sources, urls and files
32
+ def infer_name_from_contents(obj, opts={})
33
+ begin
34
+ File.split(obj).last.split('.')[0..-2].join('.').titleize
35
+ rescue
36
+ nil
37
+ end
38
+ end
39
+
28
40
  def infer_csv_contents(obj, opts={})
29
41
  contents = File.read(obj) if File.exist?(obj)
30
42
  begin
@@ -45,6 +57,9 @@ class DataFrame
45
57
  def default_csv_opts; {:converters => :all}; end
46
58
  end
47
59
 
60
+ # Include the methods from arff.rb
61
+ include ARFF
62
+
48
63
  # Loads a batch of rows. Expects an array of arrays, else you don't
49
64
  # know what you have.
50
65
  def import(rows)
@@ -64,6 +79,9 @@ class DataFrame
64
79
  # The items stored in the frame
65
80
  attr_reader :items
66
81
 
82
+ # An optional name, useful for arff files
83
+ attr_accessor :name
84
+
67
85
  def initialize(*labels)
68
86
  @labels = labels.map {|e| e.to_underscore_sym }
69
87
  @items = TransposableArray.new
@@ -0,0 +1,47 @@
1
+ require File.join(File.dirname(__FILE__), "/../spec_helper")
2
+
3
+ describe "ARFF" do
4
+ before do
5
+ @df = DataFrame.from_csv(File.expand_path(File.join(File.dirname(__FILE__), '..', 'fixtures', 'basic.csv')))
6
+ end
7
+
8
+ it "should allow a data frame to be expressed as an arff-formatted file" do
9
+ @df.to_arff.should eql(basic_arff)
10
+ end
11
+
12
+ it "should add a to_csv method" do
13
+ @df.to_csv.should eql(%{x,y,month,day,ffmc,dmc,dc,isi,temp,rh,wind,rain,area
14
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
15
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
16
+ })
17
+ end
18
+
19
+ it "should allow a non-header export for to_csv" do
20
+ @df.to_csv(false).should eql(%{7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
21
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
22
+ })
23
+ end
24
+ end
25
+
26
+ def basic_arff
27
+ %[@relation basic
28
+
29
+ @attribute x {7}
30
+ @attribute y {4,5}
31
+ @attribute month {mar,oct}
32
+ @attribute day {fri,tue}
33
+ @attribute ffmc {86.2,90.6}
34
+ @attribute dmc {26.2,35.4}
35
+ @attribute dc {94.3,669.1}
36
+ @attribute isi {5.1,6.7}
37
+ @attribute temp {8.2,18}
38
+ @attribute rh {33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51}
39
+ @attribute wind {0.9,6.7}
40
+ @attribute rain {0}
41
+ @attribute area {0}
42
+
43
+ @data
44
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
45
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
46
+ ]
47
+ end
@@ -12,6 +12,10 @@ describe DataFrame do
12
12
  df.labels.should eql(@labels)
13
13
  end
14
14
 
15
+ it "should have an optional name" do
16
+ @df.name = :some_name
17
+ @df.name.should eql(:some_name)
18
+ end
15
19
  it "should initialize with an empty items list" do
16
20
  @df.items.should be_is_a(TransposableArray)
17
21
  @df.items.should be_empty
@@ -91,6 +95,18 @@ describe DataFrame do
91
95
  @df.x.should eql([7,7])
92
96
  @df.area.should eql([0,0])
93
97
  end
98
+
99
+ it "should infer a name when importing from a file" do
100
+ filename = "/tmp/data_frame_spec.csv"
101
+ contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
102
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
103
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
104
+ }
105
+ File.open(filename, 'w') {|f| f.write contents}
106
+ @df = DataFrame.from_csv(filename)
107
+ @df.name.should eql('Data Frame Spec')
108
+ `rm -rf #{filename}`
109
+ end
94
110
  end
95
111
 
96
112
  it "should be able to remove a column" do
@@ -0,0 +1,3 @@
1
+ X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
2
+ 7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
3
+ 7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: davidrichards-data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Richards
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-29 00:00:00 -07:00
12
+ date: 2009-09-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -54,6 +54,7 @@ files:
54
54
  - README.rdoc
55
55
  - VERSION.yml
56
56
  - lib/data_frame
57
+ - lib/data_frame/arff.rb
57
58
  - lib/data_frame/callback_array.rb
58
59
  - lib/data_frame/data_frame.rb
59
60
  - lib/data_frame/model.rb
@@ -65,12 +66,15 @@ files:
65
66
  - lib/ext/string.rb
66
67
  - lib/ext/symbol.rb
67
68
  - spec/data_frame
69
+ - spec/data_frame/arff_spec.rb
68
70
  - spec/data_frame/callback_array_spec.rb
69
71
  - spec/data_frame/data_frame_spec.rb
70
72
  - spec/data_frame/model_spec.rb
71
73
  - spec/data_frame/parameter_capture_spec.rb
72
74
  - spec/data_frame/transposable_array_spec.rb
73
75
  - spec/data_frame_spec.rb
76
+ - spec/fixtures
77
+ - spec/fixtures/basic.csv
74
78
  - spec/spec_helper.rb
75
79
  has_rdoc: true
76
80
  homepage: http://github.com/davidrichards/data_frame