employ_me 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f276d820ba553a0fc83946daa695d492b3fd38526d8b284d42969a089c91cff3
|
|
4
|
+
data.tar.gz: 0d3feec149da139bd74b9cf1162806ca9615db06bab8a1c61a1c345b00b46504
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ca8fda64fbd28af29ab2ce1c0a3ec9b7b4d7871d5f6f092ca4689ffe862eb72489709f4b2034985e07ca2dcddf76e7597d6c6c972fb015a8b55ad133d0c76cf6
|
|
7
|
+
data.tar.gz: eed7d599ff2aca71ae428141d5b6820d27f353d5ee3aa7b66ea6f6faffbbc79fd9b9ff5e4c9b36087dcfe32b1d067114358f552b9773627b7ef685f9b602b805
|
|
@@ -25,6 +25,9 @@ module EmployMe
|
|
|
25
25
|
regex = Regexp.new('San Francisco, CA', Regexp::IGNORECASE)
|
|
26
26
|
return ['CA', 'San Francisco', 'CA'] if regex.match(curr_node_text)
|
|
27
27
|
|
|
28
|
+
regex = Regexp.new('San Francisco', Regexp::IGNORECASE)
|
|
29
|
+
return ['CA', 'San Francisco', 'CA'] if regex.match(curr_node_text)
|
|
30
|
+
|
|
28
31
|
# San Mateo, CA Jobs
|
|
29
32
|
regex = Regexp.new('San Mateo, CA', Regexp::IGNORECASE)
|
|
30
33
|
return ['CA', 'San Mateo', 'CA'] if regex.match(curr_node_text)
|
|
@@ -68,6 +68,17 @@ module EmployMe
|
|
|
68
68
|
|
|
69
69
|
return [low, high]
|
|
70
70
|
end
|
|
71
|
+
|
|
72
|
+
# Salary Format: $100,000 and up to $200,000
|
|
73
|
+
regex = Regexp.new('\\$([0-9]+),[0-9]+ and up to \\$([0-9]+),[0-9]+', Regexp::IGNORECASE)
|
|
74
|
+
result = regex.match(curr_node_text)
|
|
75
|
+
|
|
76
|
+
if result
|
|
77
|
+
low = result[1].to_i * 1000
|
|
78
|
+
high = result[2].to_i * 1000
|
|
79
|
+
|
|
80
|
+
return [low, high]
|
|
81
|
+
end
|
|
71
82
|
end
|
|
72
83
|
|
|
73
84
|
tree.concat(curr_node.children)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
module EmployMe
|
|
2
|
+
module Parser
|
|
3
|
+
module Technologies
|
|
4
|
+
module Strategies
|
|
5
|
+
class PatternMatch
|
|
6
|
+
# Return Set of technologies
|
|
7
|
+
def self.perform(root_node)
|
|
8
|
+
technologies = Set.new
|
|
9
|
+
|
|
10
|
+
tree = [root_node]
|
|
11
|
+
|
|
12
|
+
# Depth First Search
|
|
13
|
+
while tree.size > 0
|
|
14
|
+
curr_node = tree.shift
|
|
15
|
+
|
|
16
|
+
if curr_node.children.all? { |child| child.name == "comment" || child.name == 'text' }
|
|
17
|
+
curr_node_text = curr_node.text
|
|
18
|
+
|
|
19
|
+
# .NET
|
|
20
|
+
regex = Regexp.new('\\.NET', Regexp::IGNORECASE)
|
|
21
|
+
technologies.add(:dotnet) if regex.match(curr_node_text)
|
|
22
|
+
|
|
23
|
+
# Airflow
|
|
24
|
+
regex = Regexp.new('Airflow', Regexp::IGNORECASE)
|
|
25
|
+
technologies.add(:airflow) if regex.match(curr_node_text)
|
|
26
|
+
|
|
27
|
+
# Android
|
|
28
|
+
regex = Regexp.new('Android', Regexp::IGNORECASE)
|
|
29
|
+
technologies.add(:android) if regex.match(curr_node_text)
|
|
30
|
+
|
|
31
|
+
# AWS
|
|
32
|
+
regex = Regexp.new('AWS', Regexp::IGNORECASE)
|
|
33
|
+
technologies.add(:aws) if regex.match(curr_node_text)
|
|
34
|
+
|
|
35
|
+
# CSS
|
|
36
|
+
# regex = Regexp.new('CSS', Regexp::IGNORECASE)
|
|
37
|
+
# technologies.add(:css) if regex.match(curr_node_text)
|
|
38
|
+
|
|
39
|
+
# Docker
|
|
40
|
+
regex = Regexp.new('Docker', Regexp::IGNORECASE)
|
|
41
|
+
technologies.add(:docker) if regex.match(curr_node_text)
|
|
42
|
+
|
|
43
|
+
# Flink
|
|
44
|
+
regex = Regexp.new('Flink', Regexp::IGNORECASE)
|
|
45
|
+
technologies.add(:flink) if regex.match(curr_node_text)
|
|
46
|
+
|
|
47
|
+
# Firestore
|
|
48
|
+
regex = Regexp.new('Firestore', Regexp::IGNORECASE)
|
|
49
|
+
technologies.add(:firestore) if regex.match(curr_node_text)
|
|
50
|
+
|
|
51
|
+
# GCP
|
|
52
|
+
regex = Regexp.new('GCP', Regexp::IGNORECASE)
|
|
53
|
+
technologies.add(:gcp) if regex.match(curr_node_text)
|
|
54
|
+
|
|
55
|
+
# Helm
|
|
56
|
+
regex = Regexp.new('Helm', Regexp::IGNORECASE)
|
|
57
|
+
technologies.add(:helm) if regex.match(curr_node_text)
|
|
58
|
+
|
|
59
|
+
# HTML
|
|
60
|
+
# regex = Regexp.new('HTML', Regexp::IGNORECASE)
|
|
61
|
+
# technologies.add(:html) if regex.match(curr_node_text)
|
|
62
|
+
|
|
63
|
+
# iOS
|
|
64
|
+
regex = Regexp.new('iOS', Regexp::IGNORECASE)
|
|
65
|
+
technologies.add(:ios) if regex.match(curr_node_text)
|
|
66
|
+
|
|
67
|
+
# Istio
|
|
68
|
+
regex = Regexp.new('Istio', Regexp::IGNORECASE)
|
|
69
|
+
technologies.add(:istio) if regex.match(curr_node_text)
|
|
70
|
+
|
|
71
|
+
# Kafka
|
|
72
|
+
regex = Regexp.new('Kafka', Regexp::IGNORECASE)
|
|
73
|
+
technologies.add(:kafka) if regex.match(curr_node_text)
|
|
74
|
+
|
|
75
|
+
# Karpenter
|
|
76
|
+
regex = Regexp.new('Karpenter', Regexp::IGNORECASE)
|
|
77
|
+
technologies.add(:karpenter) if regex.match(curr_node_text)
|
|
78
|
+
|
|
79
|
+
# Kubeflow
|
|
80
|
+
regex = Regexp.new('Kubeflow', Regexp::IGNORECASE)
|
|
81
|
+
technologies.add(:kubeflow) if regex.match(curr_node_text)
|
|
82
|
+
|
|
83
|
+
# Kubernetes
|
|
84
|
+
regex = Regexp.new('Kubernetes', Regexp::IGNORECASE)
|
|
85
|
+
technologies.add(:kubernetes) if regex.match(curr_node_text)
|
|
86
|
+
|
|
87
|
+
# Linux
|
|
88
|
+
regex = Regexp.new('Linux', Regexp::IGNORECASE)
|
|
89
|
+
technologies.add(:linux) if regex.match(curr_node_text)
|
|
90
|
+
|
|
91
|
+
# Node
|
|
92
|
+
regex = Regexp.new('Node', Regexp::IGNORECASE)
|
|
93
|
+
technologies.add(:node) if regex.match(curr_node_text)
|
|
94
|
+
|
|
95
|
+
# PostgreSQL
|
|
96
|
+
regex = Regexp.new('Postgres', Regexp::IGNORECASE)
|
|
97
|
+
technologies.add(:postgresql) if regex.match(curr_node_text)
|
|
98
|
+
|
|
99
|
+
regex = Regexp.new('PostgreSQL', Regexp::IGNORECASE)
|
|
100
|
+
technologies.add(:postgresql) if regex.match(curr_node_text)
|
|
101
|
+
|
|
102
|
+
# Pulsar
|
|
103
|
+
regex = Regexp.new('Pulsar', Regexp::IGNORECASE)
|
|
104
|
+
technologies.add(:pulsar) if regex.match(curr_node_text)
|
|
105
|
+
|
|
106
|
+
# React
|
|
107
|
+
regex = Regexp.new('React', Regexp::IGNORECASE)
|
|
108
|
+
technologies.add(:react) if regex.match(curr_node_text)
|
|
109
|
+
|
|
110
|
+
# Redux
|
|
111
|
+
regex = Regexp.new('Redux', Regexp::IGNORECASE)
|
|
112
|
+
technologies.add(:redux) if regex.match(curr_node_text)
|
|
113
|
+
|
|
114
|
+
# Ruby on Rails
|
|
115
|
+
regex = Regexp.new('Ruby on Rails', Regexp::IGNORECASE)
|
|
116
|
+
technologies.add(:ruby_on_rails) if regex.match(curr_node_text)
|
|
117
|
+
|
|
118
|
+
# Snowflake
|
|
119
|
+
regex = Regexp.new('Snowflake', Regexp::IGNORECASE)
|
|
120
|
+
technologies.add(:snowflake) if regex.match(curr_node_text)
|
|
121
|
+
|
|
122
|
+
# Spark
|
|
123
|
+
regex = Regexp.new('Spark', Regexp::IGNORECASE)
|
|
124
|
+
technologies.add(:spark) if regex.match(curr_node_text)
|
|
125
|
+
|
|
126
|
+
# Tensorflow
|
|
127
|
+
regex = Regexp.new('Tensorflow', Regexp::IGNORECASE)
|
|
128
|
+
technologies.add(:tensorflow) if regex.match(curr_node_text)
|
|
129
|
+
|
|
130
|
+
# Terraform
|
|
131
|
+
regex = Regexp.new('Terraform', Regexp::IGNORECASE)
|
|
132
|
+
technologies.add(:terraform) if regex.match(curr_node_text)
|
|
133
|
+
|
|
134
|
+
# Typescript
|
|
135
|
+
regex = Regexp.new('Typescript', Regexp::IGNORECASE)
|
|
136
|
+
technologies.add(:typescript) if regex.match(curr_node_text)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
tree.concat(curr_node.children)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
technologies
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
data/lib/employ_me.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require 'employ_me/parser/location/strategies/pattern_match.rb'
|
|
2
2
|
require 'employ_me/parser/programming_language/strategies/pattern_match.rb'
|
|
3
3
|
require 'employ_me/parser/salary/strategies/pattern_match.rb'
|
|
4
|
+
require 'employ_me/parser/technologies/strategies/pattern_match.rb'
|
|
4
5
|
require 'employ_me/parser/title/strategies/pattern_match.rb'
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: employ_me
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dgonzdev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -34,6 +34,7 @@ files:
|
|
|
34
34
|
- lib/employ_me/parser/location/strategies/pattern_match.rb
|
|
35
35
|
- lib/employ_me/parser/programming_language/strategies/pattern_match.rb
|
|
36
36
|
- lib/employ_me/parser/salary/strategies/pattern_match.rb
|
|
37
|
+
- lib/employ_me/parser/technologies/strategies/pattern_match.rb
|
|
37
38
|
- lib/employ_me/parser/title/strategies/pattern_match.rb
|
|
38
39
|
homepage: https://github.com/dgonzdev/employ_me
|
|
39
40
|
licenses: []
|