google_refine_api 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/google_refine_api.rb +99 -0
- metadata +56 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
require "rest_client"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
class Project
|
5
|
+
def initialize(refine, id)
|
6
|
+
@refine = refine
|
7
|
+
@id = id
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
"#{@refine.uri}/project?project=#{@id}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Job
|
16
|
+
|
17
|
+
def initialize(refine, id)
|
18
|
+
@refine = refine
|
19
|
+
@id = id
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_raw_data(upload)
|
23
|
+
RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=load-raw-data", :upload => File.new(upload, "rb"))
|
24
|
+
|
25
|
+
while true
|
26
|
+
sleep 2
|
27
|
+
status = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
|
28
|
+
warn status
|
29
|
+
break if JSON[status]["job"]["config"]["state"] == "ready"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_project(options)
|
34
|
+
RestClient.post("#{@refine.uri}/command/core/importing-controller?controller=core%2Fdefault-importing-controller&jobID=#{@id}&subCommand=create-project",
|
35
|
+
:format => "text/line-based/*sv",
|
36
|
+
:options => options.to_json)
|
37
|
+
|
38
|
+
project_id = nil
|
39
|
+
while project_id.nil?
|
40
|
+
sleep 2
|
41
|
+
response = RestClient.post("#{@refine.uri}/command/core/get-importing-job-status?jobID=#{@id}", nil)
|
42
|
+
project_id = JSON[response]["job"]["config"]["projectID"]
|
43
|
+
end
|
44
|
+
|
45
|
+
Project.new(@refine, project_id)
|
46
|
+
end
|
47
|
+
|
48
|
+
def cancel
|
49
|
+
warn RestClient.post("#{@refine.uri}/command/core/cancel-importing-job?jobID=#{@id}", nil)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
class Refine
|
55
|
+
attr_accessor :uri
|
56
|
+
|
57
|
+
def initialize (uri)
|
58
|
+
@uri = uri
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_importing_job
|
62
|
+
response = RestClient.post("#{@uri}/command/core/create-importing-job", nil)
|
63
|
+
job_id = JSON[response]["jobID"]
|
64
|
+
warn response
|
65
|
+
Job.new(self, job_id)
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_project (upload, param = {})
|
69
|
+
|
70
|
+
default_options = {
|
71
|
+
:"encoding" => "",
|
72
|
+
:"separator" => "\\t",
|
73
|
+
:"ignoreLines" => -1,
|
74
|
+
:"headerLines" => 0,
|
75
|
+
:"skipDataLines" => 0,
|
76
|
+
:"limit" => 1000000,
|
77
|
+
:"storeBlankRows" => true,
|
78
|
+
:"guessCellValueTypes" => true,
|
79
|
+
:"processQuotes" => false,
|
80
|
+
:"storeBlankCellsAsNulls" => true,
|
81
|
+
:"includeFileSources" => false,
|
82
|
+
:"projectName" => "Uploaded by Google Refine API"
|
83
|
+
}
|
84
|
+
|
85
|
+
format = param[:format]
|
86
|
+
|
87
|
+
param[:options] ||= {}
|
88
|
+
|
89
|
+
options = default_options.update(param[:options])
|
90
|
+
|
91
|
+
job = create_importing_job
|
92
|
+
job.load_raw_data(upload)
|
93
|
+
project = job.create_project(options)
|
94
|
+
project
|
95
|
+
ensure
|
96
|
+
job.cancel if job
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_refine_api
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Cheng Guang-Nan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-10 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rest-client
|
16
|
+
requirement: &13258100 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *13258100
|
25
|
+
description:
|
26
|
+
email: guangnan@chengguangnan.com
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- lib/google_refine_api.rb
|
32
|
+
homepage: https://github.com/guangnan/google_refine_api
|
33
|
+
licenses: []
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.8.10
|
53
|
+
signing_key:
|
54
|
+
specification_version: 3
|
55
|
+
summary: Upload files to refine programmtically.
|
56
|
+
test_files: []
|