google_refine_api 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/google_refine_api.rb +99 -0
- metadata +56 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
require "rest_client"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
class Project
|
5
|
+
def initialize(refine, id)
|
6
|
+
@refine = refine
|
7
|
+
@id = id
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
"#{@refine.uri}/project?project=#{@id}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Job
|
16
|
+
|
17
|
+
def initialize(refine, id)
|
18
|
+
@refine = refine
|
19
|
+
@id = id
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_raw_data(upload)
|
23
|
+
RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=load-raw-data", :upload => File.new(upload, "rb"))
|
24
|
+
|
25
|
+
while true
|
26
|
+
sleep 2
|
27
|
+
status = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
|
28
|
+
warn status
|
29
|
+
break if JSON[status]["job"]["config"]["state"] == "ready"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_project(options)
|
34
|
+
RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=create-project",
|
35
|
+
:format => "text/line-based/*sv",
|
36
|
+
:options => options.to_json)
|
37
|
+
|
38
|
+
project_id = nil
|
39
|
+
while project_id.nil?
|
40
|
+
sleep 2
|
41
|
+
response = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
|
42
|
+
project_id = JSON[response]["job"]["config"]["projectID"]
|
43
|
+
end
|
44
|
+
|
45
|
+
Project.new(@refine, project_id)
|
46
|
+
end
|
47
|
+
|
48
|
+
def cancel
|
49
|
+
warn RestClient.post("#{@refine.uri}/command/core/cancel-importing-job?jobID=#{@id}", nil)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
class Refine
|
55
|
+
attr_accessor :uri
|
56
|
+
|
57
|
+
def initialize (uri)
|
58
|
+
@uri = uri
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_importing_job
|
62
|
+
response = RestClient.post("#{@uri}/command/core/create-importing-job", nil)
|
63
|
+
job_id = JSON[response]["jobID"]
|
64
|
+
warn response
|
65
|
+
Job.new(self, job_id)
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_project (upload, param = {})
|
69
|
+
|
70
|
+
default_options = {
|
71
|
+
:"encoding" => "",
|
72
|
+
:"separator" => "\\t",
|
73
|
+
:"ignoreLines" => -1,
|
74
|
+
:"headerLines" => 0,
|
75
|
+
:"skipDataLines" => 0,
|
76
|
+
:"limit" => 1000000,
|
77
|
+
:"storeBlankRows" => true,
|
78
|
+
:"guessCellValueTypes" => true,
|
79
|
+
:"processQuotes" => false,
|
80
|
+
:"storeBlankCellsAsNulls" => true,
|
81
|
+
:"includeFileSources" => false,
|
82
|
+
:"projectName" => "Uploaded by Google Refine API"
|
83
|
+
}
|
84
|
+
|
85
|
+
format = param[:format]
|
86
|
+
|
87
|
+
param[:options] ||= {}
|
88
|
+
|
89
|
+
options = default_options.update(param[:options])
|
90
|
+
|
91
|
+
job = create_importing_job
|
92
|
+
job.load_raw_data(upload)
|
93
|
+
project = job.create_project(options)
|
94
|
+
project
|
95
|
+
ensure
|
96
|
+
job.cancel if job
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_refine_api
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cheng Guang-Nan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-10 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: &13258100 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *13258100
|
25
|
+
description:
|
26
|
+
email: guangnan@chengguangnan.com
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- lib/google_refine_api.rb
|
32
|
+
homepage: https://github.com/guangnan/google_refine_api
|
33
|
+
licenses: []
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.8.10
|
53
|
+
signing_key:
|
54
|
+
specification_version: 3
|
55
|
+
summary: Upload files to refine programmtically.
|
56
|
+
test_files: []
|