sitedog_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +46 -224
- data/bin/sitedog_cli +172 -0
- data/lib/service_factory.rb +15 -9
- data/lib/sitedog_parser/version.rb +1 -1
- data/lib/sitedog_parser.rb +48 -38
- metadata +18 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04e6beaed38d60a1269ed3f20b071f9576eb6c4c40142fbb65fdaa4bd3bac14f
|
4
|
+
data.tar.gz: acf60abb7739f59e4b5732f95286a4e14f386ceaf1ad7d9b971d1fabc3886a87
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7825cb9106d8861d3084ca9eee9f0c52d0dfbfbc409a0a2320cd89e1487a8ed8334fdf1e1d629ab6b65d25da3e514f4dfdf059be88bdd2bad23f6d08e8ca2b63
|
7
|
+
data.tar.gz: ccad1201ba56171fd64a9869dfa538bd24ba6cc44feb0fc52f40c17962a6c582dd1589c375226ae21f289dabe67097c70eb92ad807b161ac6e40ff10b3133a55
|
data/README.md
CHANGED
@@ -4,29 +4,13 @@ A library for parsing and classifying web services from YAML files into structur
|
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
7
|
```ruby
|
10
8
|
gem 'sitedog_parser'
|
11
9
|
```
|
12
10
|
|
13
|
-
Then execute:
|
14
|
-
|
15
|
-
```bash
|
16
|
-
$ bundle install
|
17
|
-
```
|
18
|
-
|
19
|
-
Or install it yourself:
|
20
|
-
|
21
|
-
```bash
|
22
|
-
$ gem install sitedog_parser
|
23
|
-
```
|
24
|
-
|
25
11
|
## Usage
|
26
12
|
|
27
|
-
###
|
28
|
-
|
29
|
-
The easiest way to use SitedogParser is through its high-level interface:
|
13
|
+
### Basic Usage
|
30
14
|
|
31
15
|
```ruby
|
32
16
|
require 'sitedog_parser'
|
@@ -34,256 +18,94 @@ require 'sitedog_parser'
|
|
34
18
|
# Parse from a YAML file
|
35
19
|
parsed_data = SitedogParser::Parser.parse_file('data.yml')
|
36
20
|
|
37
|
-
# Or parse from a hash (if you already loaded the YAML)
|
38
|
-
yaml_data = YAML.load_file('data.yml', symbolize_names: true)
|
39
|
-
parsed_data = SitedogParser::Parser.parse(yaml_data)
|
40
|
-
|
41
|
-
# Get all services of a specific type across all domains
|
42
|
-
all_hosting_services = SitedogParser::Parser.get_services_by_type(parsed_data, :hosting)
|
43
|
-
all_hosting_services.each do |service|
|
44
|
-
puts "Hosting service: #{service.service}, URL: #{service.url}"
|
45
|
-
end
|
46
|
-
|
47
|
-
# Get all domain names
|
48
|
-
domain_names = SitedogParser::Parser.get_domain_names(parsed_data)
|
49
|
-
puts "Found domains: #{domain_names.join(', ')}"
|
50
|
-
|
51
21
|
# Working with specific domain's services
|
52
22
|
domain_services = parsed_data['example.com']
|
53
|
-
if domain_services[:
|
54
|
-
puts "
|
23
|
+
if domain_services[:hosting]
|
24
|
+
puts "Hosting: #{domain_services[:hosting].first.service}"
|
25
|
+
puts "URL: #{domain_services[:hosting].first.url}"
|
55
26
|
end
|
56
27
|
```
|
57
28
|
|
58
|
-
###
|
29
|
+
### Simple Fields
|
59
30
|
|
60
|
-
You can specify which fields should be treated as simple
|
31
|
+
You can specify which fields should be treated as simple values:
|
61
32
|
|
62
33
|
```ruby
|
63
|
-
# Define
|
34
|
+
# Define simple fields
|
64
35
|
simple_fields = [:project, :role, :environment, :registry]
|
65
36
|
|
66
37
|
# Parse with simple fields
|
67
38
|
parsed_data = SitedogParser::Parser.parse(yaml_data, simple_fields: simple_fields)
|
68
39
|
|
69
|
-
# Now you can access these fields directly as strings
|
70
|
-
domain_services = parsed_data['example.com']
|
71
|
-
if domain_services[:project]
|
72
|
-
puts "Project: #{domain_services[:project]}" # This is a string, not a Service object
|
73
|
-
end
|
74
|
-
|
75
40
|
# Find domains with a specific field value
|
76
|
-
|
77
|
-
puts "Production domains: #{domains_with_production.join(', ')}"
|
41
|
+
production_domains = SitedogParser::Parser.get_domains_by_field_value(parsed_data, :environment, 'production')
|
78
42
|
```
|
79
43
|
|
80
|
-
###
|
81
|
-
|
82
|
-
You can use the DictionaryAnalyzer to find services that might be missing from your dictionary:
|
44
|
+
### Export to JSON
|
83
45
|
|
84
46
|
```ruby
|
85
|
-
|
86
|
-
|
47
|
+
# Standard output
|
48
|
+
json_data = SitedogParser::Parser.to_json('services.yml')
|
87
49
|
|
88
|
-
#
|
89
|
-
|
50
|
+
# Or via command line:
|
51
|
+
# $ sitedog_cli services.yml > services.json
|
90
52
|
|
91
|
-
#
|
92
|
-
|
93
|
-
|
94
|
-
# Generate a report
|
95
|
-
report = SitedogParser::DictionaryAnalyzer.report(parsed_data)
|
96
|
-
puts report
|
97
|
-
|
98
|
-
# Or use the provided script
|
99
|
-
# bin/analyze_dictionary data.yml
|
53
|
+
# Compact JSON for inner objects
|
54
|
+
# $ sitedog_cli -C services.yml > services.json
|
100
55
|
```
|
101
56
|
|
102
|
-
|
103
|
-
1. A list of services that are missing from the dictionary
|
104
|
-
2. How many domains use each service
|
105
|
-
3. In which context (service type) each service is used
|
106
|
-
4. A YAML template ready to be added to your dictionary
|
107
|
-
|
108
|
-
### Example: Processing a YAML Configuration
|
109
|
-
|
110
|
-
Input YAML file (`services.yml`):
|
111
|
-
|
112
|
-
```yaml
|
113
|
-
example.com:
|
114
|
-
hosting: https://aws.amazon.com
|
115
|
-
dns:
|
116
|
-
service: cloudflare
|
117
|
-
url: https://cloudflare.com
|
118
|
-
registrar: namecheap
|
119
|
-
ssl: letsencrypt
|
120
|
-
repo: https://github.com/example/repo
|
121
|
-
|
122
|
-
another-site.org:
|
123
|
-
hosting:
|
124
|
-
service: digitalocean
|
125
|
-
url: https://digitalocean.com
|
126
|
-
cdn: https://cloudfront.aws.amazon.com
|
127
|
-
dns: https://domains.google.com
|
128
|
-
```
|
129
|
-
|
130
|
-
Processing this file:
|
131
|
-
|
132
|
-
```ruby
|
133
|
-
require 'sitedog_parser'
|
134
|
-
|
135
|
-
# Parse the file
|
136
|
-
data = SitedogParser::Parser.parse_file('services.yml')
|
137
|
-
|
138
|
-
# Get all domains
|
139
|
-
puts "Domains: #{SitedogParser::Parser.get_domain_names(data).join(', ')}"
|
57
|
+
### JSON Structure Example
|
140
58
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
dns_services = SitedogParser::Parser.get_services_by_type(data, :dns)
|
150
|
-
puts "\nDNS services:"
|
151
|
-
dns_services.each do |service|
|
152
|
-
puts "- #{service.service}: #{service.url}"
|
153
|
-
end
|
154
|
-
|
155
|
-
# Access a specific domain's services
|
156
|
-
puts "\nServices for example.com:"
|
157
|
-
example_services = data['example.com']
|
158
|
-
example_services.each do |type, services|
|
159
|
-
puts "#{type}: #{services.first.service}"
|
160
|
-
end
|
161
|
-
```
|
162
|
-
|
163
|
-
Output:
|
164
|
-
```
|
165
|
-
Domains: example.com, another-site.org
|
166
|
-
|
167
|
-
Hosting services:
|
168
|
-
- Amazon Web Services: https://aws.amazon.com
|
169
|
-
- Digitalocean: https://digitalocean.com
|
170
|
-
|
171
|
-
DNS services:
|
172
|
-
- Cloudflare: https://cloudflare.com
|
173
|
-
- Domains: https://domains.google.com
|
174
|
-
|
175
|
-
Services for example.com:
|
176
|
-
hosting: Amazon Web Services
|
177
|
-
dns: Cloudflare
|
178
|
-
registrar: Namecheap
|
179
|
-
ssl: Letsencrypt
|
180
|
-
repo: Github
|
59
|
+
```json
|
60
|
+
{
|
61
|
+
"example.com": {
|
62
|
+
"hosting": [{"service":"Amazon Web Services","url":"https://aws.amazon.com"}],
|
63
|
+
"dns": [{"service":"Cloudflare","url":"https://cloudflare.com"}],
|
64
|
+
"registrar": [{"service":"Namecheap","url":"https://namecheap.com"}]
|
65
|
+
}
|
66
|
+
}
|
181
67
|
```
|
182
68
|
|
183
|
-
### Service Object
|
184
|
-
|
185
|
-
Each service object has the following structure:
|
69
|
+
### Service Object
|
186
70
|
|
187
71
|
```ruby
|
188
|
-
# Service fields
|
189
72
|
service.service # Name of the service (capitalized string)
|
190
73
|
service.url # URL of the service (string or nil)
|
191
74
|
service.children # Child services (array of Service objects, empty if none)
|
192
75
|
```
|
193
76
|
|
194
|
-
###
|
195
|
-
|
196
|
-
SitedogParser's strength is in normalizing different data formats into a consistent structure. Here are examples showing how various input formats are handled:
|
197
|
-
|
198
|
-
#### 1. Simple URL string
|
199
|
-
```ruby
|
200
|
-
# Input
|
201
|
-
data = "https://github.com/username/repo"
|
77
|
+
### Supported Data Formats
|
202
78
|
|
203
|
-
|
204
|
-
service = ServiceFactory.create(data)
|
205
|
-
service.service # => "Github"
|
206
|
-
service.url # => "https://github.com"
|
207
|
-
service.children # => []
|
208
|
-
```
|
79
|
+
The library handles various data formats:
|
209
80
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
81
|
+
1. **URL strings**: `"https://github.com/username/repo"` → GitHub service
|
82
|
+
2. **Service names**: `"GitHub"` → GitHub service with URL
|
83
|
+
3. **Hashes with service and URL**: `{service: "Github", url: "https://github.com/repo"}`
|
84
|
+
4. **Nested hashes** with service types
|
85
|
+
5. **Hashes with URLs** as values
|
214
86
|
|
215
|
-
|
216
|
-
service = ServiceFactory.create(data)
|
217
|
-
service.service # => "GitHub"
|
218
|
-
service.url # => "https://github.com"
|
219
|
-
service.children # => []
|
220
|
-
```
|
87
|
+
### Dictionary Analysis
|
221
88
|
|
222
|
-
#### 3. Hash with service and URL
|
223
89
|
```ruby
|
224
|
-
#
|
225
|
-
|
226
|
-
service: "Github",
|
227
|
-
url: "https://github.com/username/repo"
|
228
|
-
}
|
90
|
+
# Find candidates for the dictionary (services with name but no URL)
|
91
|
+
candidates = SitedogParser::DictionaryAnalyzer.find_dictionary_candidates(parsed_data)
|
229
92
|
|
230
|
-
#
|
231
|
-
|
232
|
-
service.service # => "Github"
|
233
|
-
service.url # => "https://github.com/username/repo"
|
234
|
-
service.children # => []
|
93
|
+
# Generate a report
|
94
|
+
report = SitedogParser::DictionaryAnalyzer.report(parsed_data)
|
235
95
|
```
|
236
96
|
|
237
|
-
|
238
|
-
```ruby
|
239
|
-
# Input
|
240
|
-
data = {
|
241
|
-
dns: {
|
242
|
-
service: "route53",
|
243
|
-
url: "https://console.aws.amazon.com/route53"
|
244
|
-
},
|
245
|
-
registrar: {
|
246
|
-
service: "namecheap",
|
247
|
-
url: "https://namecheap.com"
|
248
|
-
}
|
249
|
-
}
|
97
|
+
### Command Line Options
|
250
98
|
|
251
|
-
# Output
|
252
|
-
service = ServiceFactory.create(data)
|
253
|
-
service.service # => "Unknown"
|
254
|
-
service.children.size # => 2
|
255
|
-
service.children[0].service # => "Route53"
|
256
|
-
service.children[0].url # => "https://console.aws.amazon.com/route53"
|
257
|
-
service.children[1].service # => "Namecheap"
|
258
|
-
service.children[1].url # => "https://namecheap.com"
|
259
99
|
```
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
}
|
268
|
-
|
269
|
-
# Output
|
270
|
-
service = ServiceFactory.create(data)
|
271
|
-
service.service # => "Unknown"
|
272
|
-
service.children.size # => 2
|
273
|
-
service.children[0].service # => "Hosting"
|
274
|
-
service.children[0].url # => "https://aws.amazon.com"
|
275
|
-
service.children[1].service # => "Cdn"
|
276
|
-
service.children[1].url # => "https://cloudflare.com"
|
100
|
+
$ sitedog_cli --help
|
101
|
+
Usage: sitedog_cli [options] <path_to_yaml_file> [output_file]
|
102
|
+
-d, --debug Enable debug output
|
103
|
+
-c, --compact Compact JSON without formatting
|
104
|
+
-C, --compact-children Formatted JSON with compact inner objects
|
105
|
+
-q, --quiet Suppress non-error messages
|
106
|
+
-h, --help Show this help message
|
277
107
|
```
|
278
108
|
|
279
|
-
## Development and Contribution
|
280
|
-
|
281
|
-
1. Fork the repository
|
282
|
-
2. Create a branch for your changes (`git checkout -b my-new-feature`)
|
283
|
-
3. Commit your changes (`git commit -am 'Add new feature'`)
|
284
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
285
|
-
5. Create a Pull Request
|
286
|
-
|
287
109
|
## License
|
288
110
|
|
289
|
-
|
111
|
+
MIT
|
data/bin/sitedog_cli
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'sitedog_parser'
|
5
|
+
require 'optparse'
|
6
|
+
require 'logger'
|
7
|
+
|
8
|
+
# Set default options
|
9
|
+
options = {
|
10
|
+
debug: false,
|
11
|
+
pretty: true,
|
12
|
+
compact_children: false,
|
13
|
+
output: nil,
|
14
|
+
log_level: Logger::INFO
|
15
|
+
}
|
16
|
+
|
17
|
+
# Create logger
|
18
|
+
logger = Logger.new(STDOUT)
|
19
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
20
|
+
"#{msg}\n"
|
21
|
+
end
|
22
|
+
|
23
|
+
# Command line options parser
|
24
|
+
option_parser = OptionParser.new do |opts|
|
25
|
+
opts.banner = "Usage: sitedog_cli [options] <path_to_yaml_file> [output_file]"
|
26
|
+
|
27
|
+
opts.on("-d", "--debug", "Enable debug output") do
|
28
|
+
options[:debug] = true
|
29
|
+
options[:log_level] = Logger::DEBUG
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("-c", "--compact", "Output compact JSON (without pretty formatting)") do
|
33
|
+
options[:pretty] = false
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("--compact-children", "-C", "Format JSON with compact inner objects (one line per service)") do
|
37
|
+
options[:compact_children] = true
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on("-q", "--quiet", "Suppress non-error messages") do
|
41
|
+
options[:log_level] = Logger::ERROR
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on("-h", "--help", "Show this help message") do
|
45
|
+
puts opts
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Parse options
|
51
|
+
option_parser.parse!(ARGV)
|
52
|
+
|
53
|
+
# Set logging level
|
54
|
+
logger.level = options[:log_level]
|
55
|
+
|
56
|
+
# Check command line arguments
|
57
|
+
if ARGV.empty? || ARGV.size < 1 || ARGV.size > 2
|
58
|
+
logger.error option_parser.help
|
59
|
+
exit 1
|
60
|
+
end
|
61
|
+
|
62
|
+
file_path = ARGV[0]
|
63
|
+
output_path = ARGV[1]
|
64
|
+
|
65
|
+
# Check if input file exists
|
66
|
+
unless File.exist?(file_path)
|
67
|
+
logger.error "Error: File '#{file_path}' not found."
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
|
71
|
+
# Redirect STDERR to hide debug output if not in debug mode
|
72
|
+
unless options[:debug]
|
73
|
+
original_stderr = $stderr.dup
|
74
|
+
$stderr.reopen(File.open(File::NULL, 'w'))
|
75
|
+
end
|
76
|
+
|
77
|
+
# Format JSON with inner objects on a single line
|
78
|
+
def compact_json_generate(data)
|
79
|
+
# Clone data to avoid modifying the original
|
80
|
+
formatted_data = Marshal.load(Marshal.dump(data))
|
81
|
+
|
82
|
+
# Process each domain
|
83
|
+
formatted_data.each do |domain_key, services|
|
84
|
+
# For each service type
|
85
|
+
services.each do |service_type, service_array|
|
86
|
+
if service_array.is_a?(Array)
|
87
|
+
# Convert service array to compact format
|
88
|
+
services[service_type] = service_array.map do |service_obj|
|
89
|
+
# Remove empty children arrays for compactness
|
90
|
+
if service_obj['children'] && service_obj['children'].empty?
|
91
|
+
service_obj.delete('children')
|
92
|
+
end
|
93
|
+
service_obj
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Convert to JSON with indentation at top level, but compact inner objects
|
100
|
+
output = "{\n"
|
101
|
+
|
102
|
+
formatted_data.each_with_index do |(domain, services), domain_index|
|
103
|
+
output << " \"#{domain}\": {\n"
|
104
|
+
|
105
|
+
services.keys.sort.each_with_index do |service_type, service_index|
|
106
|
+
service_data = services[service_type]
|
107
|
+
|
108
|
+
# Start line with service type
|
109
|
+
output << " \"#{service_type}\": "
|
110
|
+
|
111
|
+
# Special formatting for service arrays - on a single line
|
112
|
+
if service_data.is_a?(Array)
|
113
|
+
items = service_data.map { |item| JSON.generate(item) }.join(",")
|
114
|
+
output << "[#{items}]"
|
115
|
+
else
|
116
|
+
# For non-arrays use standard JSON.generate
|
117
|
+
output << JSON.generate(service_data)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Add comma for all elements except the last one
|
121
|
+
output << (service_index < services.keys.size - 1 ? ",\n" : "\n")
|
122
|
+
end
|
123
|
+
|
124
|
+
output << " }"
|
125
|
+
output << (domain_index < formatted_data.size - 1 ? ",\n" : "\n")
|
126
|
+
end
|
127
|
+
|
128
|
+
output << "}"
|
129
|
+
output
|
130
|
+
end
|
131
|
+
|
132
|
+
begin
|
133
|
+
logger.debug "Processing file: #{file_path}"
|
134
|
+
|
135
|
+
# Convert YAML to JSON
|
136
|
+
data = SitedogParser::Parser.to_hash(file_path, { logger: logger })
|
137
|
+
logger.debug "Data converted to hash"
|
138
|
+
|
139
|
+
# Convert to JSON based on formatting options
|
140
|
+
json_data = if options[:compact_children]
|
141
|
+
logger.debug "Generating JSON with compact inner objects"
|
142
|
+
compact_json_generate(data)
|
143
|
+
elsif options[:pretty]
|
144
|
+
logger.debug "Generating pretty JSON"
|
145
|
+
JSON.pretty_generate(data)
|
146
|
+
else
|
147
|
+
logger.debug "Generating compact JSON"
|
148
|
+
JSON.generate(data)
|
149
|
+
end
|
150
|
+
|
151
|
+
# If output file is specified, save result to it
|
152
|
+
if output_path
|
153
|
+
logger.debug "Saving to file: #{output_path}"
|
154
|
+
File.write(output_path, json_data)
|
155
|
+
logger.info "JSON data successfully saved to '#{output_path}'."
|
156
|
+
else
|
157
|
+
# Otherwise print JSON to screen
|
158
|
+
# Use puts directly for JSON output to avoid logger prefixes
|
159
|
+
puts json_data
|
160
|
+
end
|
161
|
+
|
162
|
+
rescue => e
|
163
|
+
# Restore STDERR for error messages
|
164
|
+
$stderr.reopen(original_stderr) unless options[:debug]
|
165
|
+
|
166
|
+
logger.error "Error processing file: #{e.message}"
|
167
|
+
logger.debug e.backtrace.join("\n") if options[:debug]
|
168
|
+
exit 1
|
169
|
+
ensure
|
170
|
+
# Restore STDERR
|
171
|
+
$stderr.reopen(original_stderr) unless options[:debug]
|
172
|
+
end
|
data/lib/service_factory.rb
CHANGED
@@ -2,6 +2,7 @@ require 'pry'
|
|
2
2
|
require_relative 'url_checker'
|
3
3
|
require_relative 'dictionary'
|
4
4
|
require_relative 'service'
|
5
|
+
require 'logger'
|
5
6
|
|
6
7
|
# Factory for creating Service objects from different data formats
|
7
8
|
class ServiceFactory
|
@@ -10,8 +11,13 @@ class ServiceFactory
|
|
10
11
|
# @param data [String, Hash, Array] data for creating service
|
11
12
|
# @param service_type [Symbol] service type (used as fallback)
|
12
13
|
# @param dictionary_path [String, nil] path to the dictionary file (optional)
|
14
|
+
# @param options [Hash] дополнительные опции
|
15
|
+
# @option options [Logger] :logger логгер для вывода сообщений
|
13
16
|
# @return [Service] created service object
|
14
|
-
def self.create(data, service_type = nil, dictionary_path = nil)
|
17
|
+
def self.create(data, service_type = nil, dictionary_path = nil, options = {})
|
18
|
+
# Получаем логгер из опций или создаем пустой логгер, пишущий в nil
|
19
|
+
logger = options[:logger] || Logger.new(nil)
|
20
|
+
|
15
21
|
# Check for nil
|
16
22
|
return nil if data.nil?
|
17
23
|
|
@@ -34,7 +40,7 @@ class ServiceFactory
|
|
34
40
|
slug = UrlChecker.extract_name(url) if slug.nil?
|
35
41
|
end
|
36
42
|
|
37
|
-
|
43
|
+
logger.debug "url: #{slug} <- #{url}"
|
38
44
|
in String if !UrlChecker.url_like?(data) # slug
|
39
45
|
slug = data
|
40
46
|
dict_entry = dictionary.lookup(slug)
|
@@ -45,15 +51,15 @@ class ServiceFactory
|
|
45
51
|
end
|
46
52
|
|
47
53
|
url = dict_entry&.dig('url')
|
48
|
-
|
54
|
+
logger.debug "slug: #{slug} -> #{url}"
|
49
55
|
in { service: String => service_slug, url: String => service_url }
|
50
56
|
slug = service_slug.to_s.capitalize
|
51
57
|
url = service_url
|
52
58
|
# Поиск в словаре после получения slug
|
53
59
|
dict_entry = dictionary.lookup(slug)
|
54
|
-
|
60
|
+
logger.debug "hash: #{slug} + #{url}"
|
55
61
|
in Hash
|
56
|
-
|
62
|
+
logger.debug "hash: #{data}"
|
57
63
|
|
58
64
|
# Protection from nil values in key fields
|
59
65
|
if (data.key?(:service) || data.key?("service")) &&
|
@@ -63,7 +69,7 @@ class ServiceFactory
|
|
63
69
|
|
64
70
|
# 1. Check if hash contains only URL-like strings (list of services)
|
65
71
|
if data.values.all? { |v| v.is_a?(String) && UrlChecker.url_like?(v) }
|
66
|
-
|
72
|
+
logger.debug "hash with services: #{data.keys.join(', ')}"
|
67
73
|
# Create array of child services
|
68
74
|
children = []
|
69
75
|
data.each do |key, url_value|
|
@@ -217,7 +223,7 @@ class ServiceFactory
|
|
217
223
|
return Service.new(service: "Unknown", children: children)
|
218
224
|
end
|
219
225
|
in Array
|
220
|
-
|
226
|
+
logger.debug "array: #{data}"
|
221
227
|
|
222
228
|
# Create services from array elements
|
223
229
|
children = data.map { |item| create(item, service_type, dictionary_path) }.compact
|
@@ -249,8 +255,8 @@ class ServiceFactory
|
|
249
255
|
nil
|
250
256
|
end
|
251
257
|
rescue => e
|
252
|
-
|
253
|
-
|
258
|
+
logger.error "Error creating service: #{e.message}"
|
259
|
+
logger.error "Data: #{data.inspect}"
|
254
260
|
return nil
|
255
261
|
end
|
256
262
|
end
|
data/lib/sitedog_parser.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "sitedog_parser/version"
|
2
2
|
require 'yaml'
|
3
3
|
require 'date'
|
4
|
+
require 'json'
|
4
5
|
|
5
6
|
require_relative "service"
|
6
7
|
require_relative "dictionary"
|
@@ -21,10 +22,12 @@ module SitedogParser
|
|
21
22
|
# @param symbolize_names [Boolean] whether to symbolize keys in the YAML file
|
22
23
|
# @param simple_fields [Array<Symbol>] fields that should remain as simple strings without service wrapping
|
23
24
|
# @param dictionary_path [String, nil] path to the dictionary file (optional)
|
25
|
+
# @param options [Hash] дополнительные опции
|
26
|
+
# @option options [Logger] :logger логгер для вывода сообщений
|
24
27
|
# @return [Hash] hash containing parsed services by type and domain
|
25
|
-
def self.parse_file(file_path, symbolize_names: true, simple_fields: DEFAULT_SIMPLE_FIELDS, dictionary_path: nil)
|
28
|
+
def self.parse_file(file_path, symbolize_names: true, simple_fields: DEFAULT_SIMPLE_FIELDS, dictionary_path: nil, options: {})
|
26
29
|
yaml = YAML.load_file(file_path, symbolize_names: symbolize_names)
|
27
|
-
parse(yaml, simple_fields: simple_fields, dictionary_path: dictionary_path)
|
30
|
+
parse(yaml, simple_fields: simple_fields, dictionary_path: dictionary_path, options: options)
|
28
31
|
end
|
29
32
|
|
30
33
|
# Parse YAML data and convert it to structured Ruby objects
|
@@ -32,9 +35,12 @@ module SitedogParser
|
|
32
35
|
# @param yaml [Hash] YAML data as a hash
|
33
36
|
# @param simple_fields [Array<Symbol>] fields that should remain as simple strings without service wrapping
|
34
37
|
# @param dictionary_path [String, nil] path to the dictionary file (optional)
|
38
|
+
# @param options [Hash] дополнительные опции
|
39
|
+
# @option options [Logger] :logger логгер для вывода сообщений
|
35
40
|
# @return [Hash] hash containing parsed services by type and domain
|
36
|
-
def self.parse(yaml, simple_fields: DEFAULT_SIMPLE_FIELDS, dictionary_path: nil)
|
41
|
+
def self.parse(yaml, simple_fields: DEFAULT_SIMPLE_FIELDS, dictionary_path: nil, options: {})
|
37
42
|
result = {}
|
43
|
+
logger = options[:logger]
|
38
44
|
|
39
45
|
yaml.each do |domain_name, items|
|
40
46
|
services = {}
|
@@ -60,7 +66,7 @@ module SitedogParser
|
|
60
66
|
end
|
61
67
|
else
|
62
68
|
# Для обычных полей создаем сервис
|
63
|
-
service = ServiceFactory.create(data, service_type, dictionary_path)
|
69
|
+
service = ServiceFactory.create(data, service_type, dictionary_path, options)
|
64
70
|
|
65
71
|
if service
|
66
72
|
services[service_type] ||= []
|
@@ -76,47 +82,51 @@ module SitedogParser
|
|
76
82
|
result
|
77
83
|
end
|
78
84
|
|
79
|
-
#
|
80
|
-
#
|
81
|
-
# @param
|
82
|
-
# @
|
83
|
-
# @return [
|
84
|
-
def self.
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
# Преобразует YAML файл в хеш, где объекты Service преобразуются в хеши
|
86
|
+
# @param file_path [String] путь к YAML файлу
|
87
|
+
# @param options [Hash] дополнительные опции
|
88
|
+
# @option options [Logger] :logger логгер для вывода сообщений
|
89
|
+
# @return [Hash] хеш с сервисами
|
90
|
+
def self.to_hash(file_path, options = {})
|
91
|
+
data = parse_file(file_path, options: options)
|
92
|
+
|
93
|
+
# Преобразуем объекты Service в хеши
|
94
|
+
result = {}
|
95
|
+
|
96
|
+
data.each do |domain, services|
|
97
|
+
domain_key = domain.to_sym # Преобразуем ключи доменов в символы
|
98
|
+
result[domain_key] = {}
|
99
|
+
|
100
|
+
services.each do |service_type, service_data|
|
101
|
+
service_type_key = service_type.to_sym # Преобразуем ключи типов сервисов в символы
|
102
|
+
|
103
|
+
if service_data.is_a?(Array) && service_data.first.is_a?(Service)
|
104
|
+
# Преобразуем массив сервисов в массив хешей
|
105
|
+
result[domain_key][service_type_key] = service_data.map do |service|
|
106
|
+
{
|
107
|
+
'service' => service.service,
|
108
|
+
'url' => service.url,
|
109
|
+
'children' => service.children.map { |child| {'service' => child.service, 'url' => child.url} }
|
110
|
+
}
|
111
|
+
end
|
112
|
+
else
|
113
|
+
# Сохраняем простые поля как есть
|
114
|
+
result[domain_key][service_type_key] = service_data
|
115
|
+
end
|
90
116
|
end
|
91
117
|
end
|
92
118
|
|
93
119
|
result
|
94
120
|
end
|
95
121
|
|
96
|
-
#
|
122
|
+
# Преобразует данные из YAML файла в JSON формат
|
97
123
|
#
|
98
|
-
# @param
|
99
|
-
# @
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
# Get domains with a specific simple field value
|
105
|
-
#
|
106
|
-
# @param parsed_data [Hash] data returned by parse or parse_file
|
107
|
-
# @param field [Symbol] simple field to filter by
|
108
|
-
# @param value [String] value to match
|
109
|
-
# @return [Array] array of domain names that have the specified field value
|
110
|
-
def self.get_domains_by_field_value(parsed_data, field, value)
|
111
|
-
result = []
|
112
|
-
|
113
|
-
parsed_data.each do |domain_name, services|
|
114
|
-
if services[field] == value
|
115
|
-
result << domain_name
|
116
|
-
end
|
117
|
-
end
|
118
|
-
|
119
|
-
result
|
124
|
+
# @param file_path [String] путь к YAML файлу
|
125
|
+
# @param options [Hash] дополнительные опции
|
126
|
+
# @option options [Logger] :logger логгер для вывода сообщений
|
127
|
+
# @return [String] форматированная JSON строка
|
128
|
+
def self.to_json(file_path, options = {})
|
129
|
+
JSON.pretty_generate(to_hash(file_path, options))
|
120
130
|
end
|
121
131
|
end
|
122
132
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitedog_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ivan Nemytchenko
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.10.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest-power_assert
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: thor
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,13 +113,14 @@ description: A library for parsing and classifying web services, hosting, and do
|
|
99
113
|
email:
|
100
114
|
- nemytchenko@gmail.com
|
101
115
|
executables:
|
102
|
-
-
|
116
|
+
- sitedog_cli
|
103
117
|
extensions: []
|
104
118
|
extra_rdoc_files: []
|
105
119
|
files:
|
106
120
|
- CHANGELOG.md
|
107
121
|
- README.md
|
108
122
|
- bin/analyze_dictionary
|
123
|
+
- bin/sitedog_cli
|
109
124
|
- lib/data_structures.rb
|
110
125
|
- lib/dictionary.rb
|
111
126
|
- lib/dictionary_analyzer.rb
|