crawlab_ruby_sdk 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c85282caf96edf6a9100a70b045bb1004a471db9d2a7842f83e77a28fbde32e
4
- data.tar.gz: cc64ce0004eeede08da51a5a7eec294f0db1b494d897079562a1578128960a1e
3
+ metadata.gz: c5c857dd2f243d47318786c542faec1a9fc7a6a52bba0edf7523313379b4f2d4
4
+ data.tar.gz: fbbacb701ad2bd5d090d85ba3322cd82a5c02f5d7d0d3ab729ee2b2b0d0882c1
5
5
  SHA512:
6
- metadata.gz: 012211e75c7de44e0493c931ce3d6d4cd8f1351da52ae4bd77da3aca95e39294d3c4c9e2c5cf2d5ebd877dcb1dbd2e45bc9f9b8ce2199ffdf3d71c3f6de517a4
7
- data.tar.gz: 380d04dddd44c608e80eebc23f601f23acaacc08fe291e6a711749e65b00d7aef664a8d30a0a0537bfec82c4e02620da3dfdbcf259bd1bd6aecc2bebaaf821d2
6
+ metadata.gz: 0f9752ef0ad2e59d72fa870a6493a8d831716c0422bb4d6d08a0afc3cadc08ee9b4b80eb97af5192ab1094ad7884533b3c7c75cc76d1a47fe7c0b90f991538d0
7
+ data.tar.gz: 47e6a0373338ad0d32b8286d2b422da55a67d5728655fca51a028a17928ee301b981d17cb396e06d23f8911ac37663b471360f8ecce97e55b7f8595e4e8096c1
data/README.md CHANGED
@@ -39,6 +39,19 @@ Or install it yourself as:
39
39
  puts bucket_url
40
40
  ```
41
41
 
42
+ ##### 字段验证规则
43
+
44
+ ```
45
+ CrawlabRubySdk.save_item({name: "haha",age:12},table_name)
46
+ CrawlabRubySdk.save_items([{name: "haha",age:12},{name:"456",age:34}],table_name)
47
+ table_name 可以是这几个["thinktank_expert_reports", "thinktank_informations", "thinktank_experts", "thinktank_reports"] 中的一个,也可以不传递
48
+
49
+ 1、如果当前采集的结果 存储的数据表是上面几个中的一个,无论是否传递,都会按照对应的表结构的 字段规则验证字段
50
+ 2、如果当前采集的结果,存储的数据表不属于上面几个中的一个,则按照传递传递的table_name,对应的字段规则验证字段
51
+ 3、如果没有传递table_name,并且采集的结果存储临时表,则根据推送的结果的字段匹配 上面四个表的字段,匹配上了就按照匹配上的表结构的字段规则验证,没有匹配上则不验证
52
+ ```
53
+
54
+
42
55
  ## Development
43
56
 
44
57
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,3 +1,3 @@
1
1
  module CrawlabRubySdk
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
@@ -36,6 +36,7 @@ module CrawlabRubySdk
36
36
  auth = "Crawlab2021!"
37
37
  end
38
38
 
39
+ table_name = get_table_name(table_name)
39
40
  if !Verify.IsVerified?([item],table_name)
40
41
  return
41
42
  end
@@ -57,7 +58,9 @@ module CrawlabRubySdk
57
58
  if auth==nil || auth == ""
58
59
  auth = "Crawlab2021!"
59
60
  end
60
- if !Verify.IsVerified?([item],table_name)
61
+
62
+ table_name = get_table_name(table_name)
63
+ if !Verify.IsVerified?(items,table_name)
61
64
  return
62
65
  end
63
66
 
@@ -99,10 +102,11 @@ module CrawlabRubySdk
99
102
 
100
103
  data = {task_id: task_id,data:records}.to_json.b
101
104
 
105
+ # puts "=====#{{task_id: task_id,data:records}.to_json}==="
106
+
102
107
  msg = Grpc::StreamMessage.new(code:3,data:data)
103
- puts data
104
108
 
105
- # sub_client.Send([msg])
109
+ sub_client.Send([msg])
106
110
  end
107
111
 
108
112
  def self.get_task_id
@@ -120,4 +124,14 @@ module CrawlabRubySdk
120
124
  def self.save_file_stream_to_oss(oss_path,stream)
121
125
  OssServerClient.new.send_stream(oss_path,stream)
122
126
  end
127
+
128
+ def self.get_table_name(table_name="")
129
+ table_names = Verify.table_names
130
+ if ENV["TABLE_NAME"] != nil && ENV["TABLE_NAME"] != "" && table_names.include?(ENV["TABLE_NAME"])
131
+ table_name = ENV["TABLE_NAME"]
132
+ return table_name
133
+ end
134
+
135
+ return table_name
136
+ end
123
137
  end
data/lib/models/base.rb CHANGED
@@ -85,6 +85,7 @@ class BaseModel
85
85
  rescue StandardError => e
86
86
  return false
87
87
  end
88
+ return true
88
89
  end
89
90
 
90
91
  def verify_string(v)
@@ -161,7 +162,7 @@ class BaseModel
161
162
  return false
162
163
  end
163
164
  fields_arr = fn.split(":")
164
- if length_arr.size < 2
165
+ if fields_arr.size < 2
165
166
  return false
166
167
  end
167
168
  fields_str = fields_arr[1]
@@ -169,15 +170,17 @@ class BaseModel
169
170
  fields = fields_str.split(",")
170
171
 
171
172
  begin
172
- datas = json.parse(v)
173
+ datas = JSON.parse(v)
173
174
  datas.each do |data|
174
175
  fields.each do |field|
175
176
  if data[field] == nil
177
+ puts "ERROR #{field} not Exist!"
176
178
  return false
177
179
  end
178
180
  end
179
181
  end
180
182
  rescue StandardError => e
183
+ puts e
181
184
  return false
182
185
  end
183
186
 
@@ -41,7 +41,40 @@ class ThinktankExpert < BaseModel
41
41
 
42
42
 
43
43
  def initialize(options={})
44
-
44
+ options = JSON.parse(options.to_json)
45
+ @id = options["id"]
46
+ @name = options["name"]
47
+ @title = options["title"]
48
+ @content = options["content"]
49
+ @location = options["location"]
50
+ @area_of_expertise = options["area_of_expertise"]
51
+ @profile_images = options["profile_images"]
52
+ @phone = options["phone"]
53
+ @email = options["email"]
54
+ @link = options["link"]
55
+ @audios = options["audios"]
56
+ @videos = options["videos"]
57
+ @education = options["education"]
58
+ @related_topics = options["related_topics"]
59
+ @site_name = options["site_name"]
60
+ @site_name_cn = options["site_name_cn"]
61
+ @domain = options["domain"]
62
+ @created_at = options["created_at"]
63
+ @updated_at = options["updated_at"]
64
+ @source = options["source"]
65
+ @oss_profile_images = options["oss_profile_images"]
66
+ @facebook = options["facebook"]
67
+ @twitter = options["twitter"]
68
+ @linkedin = options["linkedin"]
69
+ @instagram = options["instagram"]
70
+ @wikidata = options["wikidata"]
71
+ @person_type = options["person_type"]
72
+ @files = options["files"]
73
+ @oss_files = options["oss_files"]
74
+ @associated_program = options["associated_program"]
75
+ @lang = options["lang"]
76
+ @website = options["website"]
77
+ @nationalities = options["nationalities"]
45
78
  end
46
79
 
47
80
  def as_json
@@ -42,7 +42,43 @@ class ThinktankExpertReport < BaseModel
42
42
  register
43
43
 
44
44
  def initialize(options={})
45
-
45
+ options = JSON.parse(options.to_json)
46
+ @id = options["id"]
47
+ @title = options["title"]
48
+ @site_name = options["site_name"]
49
+ @site_name_cn = options["site_name_cn"]
50
+ @abstract = options["abstract"]
51
+ @content = options["content"]
52
+ @author_names = options["author_names"]
53
+ @source = options["source"]
54
+ @files = options["files"]
55
+ @images = options["images"]
56
+ @videos = options["videos"]
57
+ @audios = options["audios"]
58
+ @links = options["links"]
59
+ @domain = options["domain"]
60
+ @keywords= options["keywords"]
61
+ @html_content = options["html_content"]
62
+ @lang = options["lang"]
63
+ @country_cn = options["country_cn"]
64
+ @country_code = options["country_code"]
65
+ @created_at = options["created_at"]
66
+ @updated_at = options["updated_at"]
67
+ @created_time = options["created_time"]
68
+ @oss_files = options["oss_files"]
69
+ @oss_images = options["oss_images"]
70
+ @customer_category = options["customer_category"]
71
+ @category = options["category"]
72
+ @topics = options["topics"]
73
+ @tags = options["tags"]
74
+ @views = options["views"]
75
+ @comments = options["comments"]
76
+ @reference = options["reference"]
77
+ @mention_country = options["mention_country"]
78
+ @authors = options["authors"]
79
+ @sub_title = options["sub_title"]
80
+ @timezone = options["timezone"]
81
+ @timezone_location = options["timezone_location"]
46
82
  end
47
83
 
48
84
  def as_json
@@ -44,7 +44,44 @@ class ThinktankInformation < BaseModel
44
44
 
45
45
 
46
46
  def initialize(options={})
47
-
47
+ options = JSON.parse(options.to_json)
48
+ @id = options["id"]
49
+ @title = options["title"]
50
+ @site_name = options["site_name"]
51
+ @site_name_cn = options["site_name_cn"]
52
+ @abstract = options["abstract"]
53
+ @content = options["content"]
54
+ @author_names = options["author_names"]
55
+ @state_info = options["state_info"]
56
+ @source = options["source"]
57
+ @files = options["files"]
58
+ @images = options["images"]
59
+ @videos = options["videos"]
60
+ @audios = options["audios"]
61
+ @links = options["links"]
62
+ @domain = options["domain"]
63
+ @keywords = options["keywords"]
64
+ @html_content = options["html_content"]
65
+ @lang = options["lang"]
66
+ @country_cn = options["country_cn"]
67
+ @country_code = options["country_code"]
68
+ @created_at = options["created_at"]
69
+ @updated_at = options["updated_at"]
70
+ @created_time = options["created_time"]
71
+ @oss_files = options["oss_files"]
72
+ @oss_images = options["oss_images"]
73
+ @customer_category = options["customer_category"]
74
+ @category = options["category"]
75
+ @topics = options["topics"]
76
+ @tags = options["tags"]
77
+ @views = options["views"]
78
+ @comments = options["comments"]
79
+ @reference = options["reference"]
80
+ @mention_country = options["mention_country"]
81
+ @authors = options["authors"]
82
+ @sub_title = options["sub_title"]
83
+ @timezone = options["timezone"]
84
+ @timezone_location = options["timezone_location"]
48
85
  end
49
86
 
50
87
  def as_json
@@ -43,7 +43,43 @@ class ThinktankReport < BaseModel
43
43
 
44
44
 
45
45
  def initialize(options = {})
46
-
46
+ options = JSON.parse(options.to_json)
47
+ @id = options["id"]
48
+ @title = options["title"]
49
+ @site_name = options["site_name"]
50
+ @site_name_cn = options["site_name_cn"]
51
+ @abstract = options["abstract"]
52
+ @content = options["content"]
53
+ @author_names = options["author_names"]
54
+ @source = options["source"]
55
+ @files = options["files"]
56
+ @images = options["images"]
57
+ @videos = options["videos"]
58
+ @audios = options["audios"]
59
+ @links = options["links"]
60
+ @domain = options["domain"]
61
+ @keywords = options["keywords"]
62
+ @html_content = options["html_content"]
63
+ @lang = options["lang"]
64
+ @country_cn = options["country_cn"]
65
+ @country_code = options["country_code"]
66
+ @created_at = options["created_at"]
67
+ @updated_at = options["updated_at"]
68
+ @created_time = options["created_time"]
69
+ @oss_files = options["oss_files"]
70
+ @oss_images = options["oss_images"]
71
+ @customer_category = options["customer_category"]
72
+ @category = options["category"]
73
+ @topics = options["topics"]
74
+ @tags = options["tags"]
75
+ @views = options["views"]
76
+ @comments = options["comments"]
77
+ @reference = options["reference"]
78
+ @mention_country = options["mention_country"]
79
+ @authors = options["authors"]
80
+ @sub_title = options["sub_title"]
81
+ @timezone = options["timezone"]
82
+ @timezone_location = options["timezone_location"]
47
83
  end
48
84
 
49
85
  def to_json
data/lib/verify/verify.rb CHANGED
@@ -3,6 +3,10 @@ class Verify
3
3
 
4
4
  if table_name == "" && items.size > 0
5
5
  table_name = switch_table(items[0])
6
+ elsif table_name != "" && items.size > 0
7
+ if !match_fields?(items[0],table_name)
8
+ return
9
+ end
6
10
  end
7
11
 
8
12
  if table_name == ""
@@ -50,4 +54,40 @@ class Verify
50
54
  return table_name
51
55
  end
52
56
 
57
+ def self.match_fields?(item={},table_name)
58
+ if table_name == nil || table_name == ""
59
+ return false
60
+ end
61
+ item_keys = JSON.parse(item.to_json).keys
62
+ v = $map_models[table_name]
63
+
64
+ if v == nil
65
+ puts "ERROR #{table_name} not Exist!"
66
+ return false
67
+ end
68
+
69
+ klass = Object.const_get v
70
+ klass_keys = klass.fields
71
+
72
+ # puts "----#{klass_keys}--"
73
+ sub_keys1 = item_keys - klass_keys
74
+ if sub_keys1.size > 0
75
+ puts "ERROR #{sub_keys1} do not belong #{table_name}"
76
+ return false
77
+ end
78
+
79
+ sub_keys2 = klass_keys - item_keys
80
+ if sub_keys2.size > 0
81
+ puts "ERROR #{sub_keys2} do not Exist!"
82
+ return false
83
+ end
84
+
85
+ return true
86
+
87
+ end
88
+
89
+ def self.table_names
90
+ return $map_models.keys
91
+ end
92
+
53
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawlab_ruby_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - min
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-10 00:00:00.000000000 Z
11
+ date: 2023-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: grpc