crawlab_ruby_sdk 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/crawlab_ruby_sdk.gemspec +7 -2
- data/lib/client/oss_server_client.rb +60 -0
- data/lib/crawlab_ruby_sdk/version.rb +1 -1
- data/lib/crawlab_ruby_sdk.rb +73 -19
- data/test_grpc_client.rb +41 -0
- metadata +66 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ab07d443a57759aabbf8fd96fd7f8d50fd8b540dcc3c21762cb775eb43b702d
|
4
|
+
data.tar.gz: c00cbf3d27a1121b83a24eb22cb62d477529d7ab4cb9c331653d9bd77ab50e3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cef20ce95c9d77c6288f4c8b5a371d7204d0d701a5249626b40ab77ac04c90b90184240de55c4aff8a395d50f462054c0682f8306cec19578a04e3cbdcd77bf
|
7
|
+
data.tar.gz: f60a2a02e8b81258ed58dd32d129688006af5b8cb14666458bf322dd24ec7547d479de496d16e24fc8c80ad477b085b78921abb515ce21c10428374e1f3107d5
|
data/crawlab_ruby_sdk.gemspec
CHANGED
@@ -6,11 +6,16 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.authors = ["min"]
|
7
7
|
spec.email = ["lijinmin3903@126.com"]
|
8
8
|
|
9
|
-
spec.summary = %q{
|
10
|
-
spec.description = %q{
|
9
|
+
spec.summary = %q{Write a short summary, because RubyGems requires one.}
|
10
|
+
spec.description = %q{Write a longer description or delete this line.}
|
11
11
|
spec.homepage = "https://github.com/rich-bro/crawlab_ruby_sdk"
|
12
12
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
13
13
|
|
14
|
+
spec.add_dependency 'grpc','~> 1.55.0'
|
15
|
+
spec.add_dependency 'google-protobuf','~> 3.23.2'
|
16
|
+
spec.add_dependency 'json','~> 2.6.3'
|
17
|
+
spec.add_dependency 'aliyun-sdk','~> 0.8.0'
|
18
|
+
|
14
19
|
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
15
20
|
|
16
21
|
spec.metadata["homepage_uri"] = spec.homepage
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'aliyun/oss'
|
2
|
+
|
3
|
+
class OssServerClient
|
4
|
+
attr_accessor :oss_client
|
5
|
+
attr_accessor :bucket
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
get_oss_client
|
9
|
+
get_bucket
|
10
|
+
end
|
11
|
+
|
12
|
+
def get_oss_client
|
13
|
+
endpoint = ENV["CRAWLAB_OSS_ENDPOINT"]
|
14
|
+
access_key_id = ENV["CRAWLAB_OSS_ACCESS_KEY"]
|
15
|
+
access_key_secret = ENV["CRAWLAB_OSS_SECRET"]
|
16
|
+
bucket = ENV["CRAWLAB_OSS_BUCKET"]
|
17
|
+
if endpoint == nil || endpoint == ""
|
18
|
+
return
|
19
|
+
end
|
20
|
+
if access_key_id == nil || access_key_id == ""
|
21
|
+
return
|
22
|
+
end
|
23
|
+
if access_key_secret == nil || access_key_secret == ""
|
24
|
+
return
|
25
|
+
end
|
26
|
+
|
27
|
+
if bucket == nil || bucket == ""
|
28
|
+
return
|
29
|
+
end
|
30
|
+
@oss_client = Aliyun::OSS::Client.new(
|
31
|
+
:endpoint => endpoint,
|
32
|
+
:access_key_id => access_key_id,
|
33
|
+
:access_key_secret => access_key_secret)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_bucket
|
37
|
+
bucket = ENV["CRAWLAB_OSS_BUCKET"]
|
38
|
+
if bucket == nil || bucket == ""
|
39
|
+
return
|
40
|
+
end
|
41
|
+
@bucket = @oss_client.get_bucket(bucket)
|
42
|
+
end
|
43
|
+
|
44
|
+
def send(oss_path,file_path)
|
45
|
+
if bucket == nil || @oss_client == nil
|
46
|
+
return "bucket is nil"
|
47
|
+
end
|
48
|
+
bucket.put_object(oss_path, :file => file_path)
|
49
|
+
|
50
|
+
bucket_url = bucket.object_url(oss_path)
|
51
|
+
return bucket_url
|
52
|
+
end
|
53
|
+
|
54
|
+
def send_stream(oss_path,stream)
|
55
|
+
bucket.put_object(oss_path){ |a| a << stream }
|
56
|
+
|
57
|
+
bucket_url = bucket.object_url(oss_path)
|
58
|
+
return bucket_url
|
59
|
+
end
|
60
|
+
end
|
data/lib/crawlab_ruby_sdk.rb
CHANGED
@@ -9,21 +9,21 @@ def traverse_dir(file_path)
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
|
12
|
+
dir = File.expand_path("..", __FILE__)
|
13
13
|
|
14
|
-
# puts
|
14
|
+
# puts dir
|
15
15
|
|
16
|
-
traverse_dir(
|
17
|
-
traverse_dir(
|
18
|
-
traverse_dir(
|
19
|
-
traverse_dir(
|
20
|
-
traverse_dir(
|
16
|
+
traverse_dir(dir+'/entity/stream_message_code_pb.rb')
|
17
|
+
traverse_dir(dir+'/entity/result_pb.rb')
|
18
|
+
traverse_dir(dir+'/entity/stream_message_pb.rb')
|
19
|
+
traverse_dir(dir+'/entity/stream_message_data_task_pb.rb')
|
20
|
+
traverse_dir(dir+'/client')
|
21
21
|
|
22
22
|
module CrawlabRubySdk
|
23
23
|
class Error < StandardError; end
|
24
24
|
# Your code goes here...
|
25
25
|
|
26
|
-
|
26
|
+
|
27
27
|
def self.save_item(item={})
|
28
28
|
address = ENV["CRAWLAB_GRPC_ADDRESS"]
|
29
29
|
if address==nil || address == ""
|
@@ -39,22 +39,76 @@ module CrawlabRubySdk
|
|
39
39
|
|
40
40
|
sub_client = client.subscribe
|
41
41
|
|
42
|
+
save(sub_client,[item])
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.save_items(items=[])
|
46
|
+
address = ENV["CRAWLAB_GRPC_ADDRESS"]
|
47
|
+
if address==nil || address == ""
|
48
|
+
address = "localhost:9666"
|
49
|
+
end
|
50
|
+
|
51
|
+
auth = ENV["CRAWLAB_GRPC_AUTH_KEY"]
|
52
|
+
if auth==nil || auth == ""
|
53
|
+
auth = "Crawlab2021!"
|
54
|
+
end
|
55
|
+
|
56
|
+
client = TaskServiceClient.new(address,auth)
|
57
|
+
|
58
|
+
sub_client = client.subscribe
|
59
|
+
|
60
|
+
save(sub_client,items)
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.save(sub_client,items = [])
|
64
|
+
results = []
|
65
|
+
|
66
|
+
items.each do |item|
|
67
|
+
results << item
|
68
|
+
|
69
|
+
if results.size >= 50
|
70
|
+
_save(sub_client,results)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
if results.size > 0
|
76
|
+
_save(sub_client,results)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self._save(sub_client,items=[])
|
81
|
+
records = []
|
82
|
+
task_id = get_task_id
|
83
|
+
if task_id == nil || task_id == ""
|
84
|
+
return
|
85
|
+
end
|
86
|
+
|
87
|
+
items.each do |item|
|
88
|
+
item["_tid"] = task_id
|
89
|
+
records << item
|
90
|
+
end
|
91
|
+
|
92
|
+
data = {task_id: task_id,data:records}.to_json
|
93
|
+
|
94
|
+
msg = Grpc::StreamMessage.new(code:3,data:data)
|
95
|
+
|
96
|
+
sub_client.Send([msg])
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.get_task_id
|
42
100
|
task_id = ENV["CRAWLAB_TASK_ID"]
|
43
101
|
if task_id == nil || task_id == ""
|
44
102
|
task_id = "6486e256fa1cb07a47c09adf"
|
45
103
|
end
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
data = Grpc::StreamMessageDataTask.new()
|
50
|
-
# puts item
|
104
|
+
return task_id
|
105
|
+
end
|
51
106
|
|
52
|
-
|
53
|
-
|
107
|
+
def self.save_file_to_oss(oss_path,file_path)
|
108
|
+
OssServerClient.new.send(oss_path,file_path)
|
109
|
+
end
|
54
110
|
|
55
|
-
|
56
|
-
|
57
|
-
puts msg
|
58
|
-
sub_client.Send([msg])
|
111
|
+
def self.save_file_stream_to_oss(oss_path,stream)
|
112
|
+
OssServerClient.new.send_stream(oss_path,stream)
|
59
113
|
end
|
60
114
|
end
|
data/test_grpc_client.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
require 'crawlab_ruby_sdk'
|
4
|
+
require 'rest-client'
|
5
|
+
def main
|
6
|
+
# CrawlabRubySdk.save_item({name: "haha",age:12})
|
7
|
+
|
8
|
+
# CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}])
|
9
|
+
# ENV.each do |k,v|
|
10
|
+
# puts "#{k}=#{v}"
|
11
|
+
# end
|
12
|
+
|
13
|
+
# oss_path = "thinkthank_files/files/1123123123.pdf"
|
14
|
+
# file_path = "/home/min/Downloads/RAND_RRA1218-2.pdf"
|
15
|
+
# bucket_url = CrawlabRubySdk.save_file_to_oss(oss_path,file_path)
|
16
|
+
# puts bucket_url
|
17
|
+
|
18
|
+
# oss_path = "thinkthank_files/files/456.pdf"
|
19
|
+
# res = RestClient.get("https://www.rand.org/content/dam/rand/pubs/research_reports/RRA1200/RRA1218-2/RAND_RRA1218-2.pdf")
|
20
|
+
# stream = res.body
|
21
|
+
|
22
|
+
# file = File.open("/home/min/Downloads/aa.pdf","w")
|
23
|
+
|
24
|
+
# file.write(stream)
|
25
|
+
# file.close
|
26
|
+
# bucket_url = CrawlabRubySdk.save_file_stream_to_oss(oss_path,stream)
|
27
|
+
# puts bucket_url
|
28
|
+
|
29
|
+
|
30
|
+
endpoint = ENV["CRAWLAB_OSS_ENDPOINT"] = ""
|
31
|
+
access_key_id = ENV["CRAWLAB_OSS_ACCESS_KEY"] = ""
|
32
|
+
access_key_secret = ENV["CRAWLAB_OSS_SECRET"] = ""
|
33
|
+
bucket = ENV["CRAWLAB_OSS_BUCKET"] = ""
|
34
|
+
|
35
|
+
a = OssServerClient.new
|
36
|
+
|
37
|
+
a.bucket.get_object("thinkthank_files/files/789.pdf", :file => "/home/min/Downloads/2-2.pdf")
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
main
|
metadata
CHANGED
@@ -1,16 +1,72 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawlab_ruby_sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- min
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
12
|
-
dependencies:
|
13
|
-
|
11
|
+
date: 2023-06-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: grpc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.55.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.55.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: google-protobuf
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.23.2
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.23.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.6.3
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.6.3
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: aliyun-sdk
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.8.0
|
69
|
+
description: Write a longer description or delete this line.
|
14
70
|
email:
|
15
71
|
- lijinmin3903@126.com
|
16
72
|
executables: []
|
@@ -24,6 +80,7 @@ files:
|
|
24
80
|
- bin/console
|
25
81
|
- bin/setup
|
26
82
|
- crawlab_ruby_sdk.gemspec
|
83
|
+
- lib/client/oss_server_client.rb
|
27
84
|
- lib/client/task_service_client.rb
|
28
85
|
- lib/client/task_service_subscribe_client.rb
|
29
86
|
- lib/crawlab_ruby_sdk.rb
|
@@ -39,6 +96,7 @@ files:
|
|
39
96
|
- lib/entity/stream_message_pb.rb
|
40
97
|
- lib/models/node_pb.rb
|
41
98
|
- lib/models/task_pb.rb
|
99
|
+
- test_grpc_client.rb
|
42
100
|
homepage: https://github.com/rich-bro/crawlab_ruby_sdk
|
43
101
|
licenses: []
|
44
102
|
metadata:
|
@@ -46,7 +104,7 @@ metadata:
|
|
46
104
|
homepage_uri: https://github.com/rich-bro/crawlab_ruby_sdk
|
47
105
|
source_code_uri: https://github.com/rich-bro/crawlab_ruby_sdk
|
48
106
|
changelog_uri: https://github.com/rich-bro/crawlab_ruby_sdk
|
49
|
-
post_install_message:
|
107
|
+
post_install_message:
|
50
108
|
rdoc_options: []
|
51
109
|
require_paths:
|
52
110
|
- lib
|
@@ -62,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
62
120
|
version: '0'
|
63
121
|
requirements: []
|
64
122
|
rubygems_version: 3.1.4
|
65
|
-
signing_key:
|
123
|
+
signing_key:
|
66
124
|
specification_version: 4
|
67
|
-
summary:
|
125
|
+
summary: Write a short summary, because RubyGems requires one.
|
68
126
|
test_files: []
|