google_refine_api 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/google_refine_api.rb +99 -0
  2. metadata +56 -0
@@ -0,0 +1,99 @@
1
+ require "rest_client"
2
+ require "json"
3
+
4
+ class Project
5
+ def initialize(refine, id)
6
+ @refine = refine
7
+ @id = id
8
+ end
9
+
10
+ def to_s
11
+ "#{@refine.uri}/project?project=#{@id}"
12
+ end
13
+ end
14
+
15
+ class Job
16
+
17
+ def initialize(refine, id)
18
+ @refine = refine
19
+ @id = id
20
+ end
21
+
22
+ def load_raw_data(upload)
23
+ RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=load-raw-data", :upload => File.new(upload, "rb"))
24
+
25
+ while true
26
+ sleep 2
27
+ status = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
28
+ warn status
29
+ break if JSON[status]["job"]["config"]["state"] == "ready"
30
+ end
31
+ end
32
+
33
+ def create_project(options)
34
+ RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=create-project",
35
+ :format => "text/line-based/*sv",
36
+ :options => options.to_json)
37
+
38
+ project_id = nil
39
+ while project_id.nil?
40
+ sleep 2
41
+ response = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
42
+ project_id = JSON[response]["job"]["config"]["projectID"]
43
+ end
44
+
45
+ Project.new(@refine, project_id)
46
+ end
47
+
48
+ def cancel
49
+ warn RestClient.post("#{@refine.uri}/command/core/cancel-importing-job?jobID=#{@id}", nil)
50
+ end
51
+
52
+ end
53
+
54
+ class Refine
55
+ attr_accessor :uri
56
+
57
+ def initialize (uri)
58
+ @uri = uri
59
+ end
60
+
61
+ def create_importing_job
62
+ response = RestClient.post("#{@uri}/command/core/create-importing-job", nil)
63
+ job_id = JSON[response]["jobID"]
64
+ warn response
65
+ Job.new(self, job_id)
66
+ end
67
+
68
+ def create_project (upload, param = {})
69
+
70
+ default_options = {
71
+ :"encoding" => "",
72
+ :"separator" => "\\t",
73
+ :"ignoreLines" => -1,
74
+ :"headerLines" => 0,
75
+ :"skipDataLines" => 0,
76
+ :"limit" => 1000000,
77
+ :"storeBlankRows" => true,
78
+ :"guessCellValueTypes" => true,
79
+ :"processQuotes" => false,
80
+ :"storeBlankCellsAsNulls" => true,
81
+ :"includeFileSources" => false,
82
+ :"projectName" => "Uploaded by Google Refine API"
83
+ }
84
+
85
+ format = param[:format]
86
+
87
+ param[:options] ||= {}
88
+
89
+ options = default_options.update(param[:options])
90
+
91
+ job = create_importing_job
92
+ job.load_raw_data(upload)
93
+ project = job.create_project(options)
94
+ project
95
+ ensure
96
+ job.cancel if job
97
+ end
98
+
99
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_refine_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Cheng Guang-Nan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-10 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: &13258100 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *13258100
25
+ description:
26
+ email: guangnan@chengguangnan.com
27
+ executables: []
28
+ extensions: []
29
+ extra_rdoc_files: []
30
+ files:
31
+ - lib/google_refine_api.rb
32
+ homepage: https://github.com/guangnan/google_refine_api
33
+ licenses: []
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 1.8.10
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Upload files to refine programmtically.
56
+ test_files: []