sitedog_parser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04e6beaed38d60a1269ed3f20b071f9576eb6c4c40142fbb65fdaa4bd3bac14f
4
- data.tar.gz: acf60abb7739f59e4b5732f95286a4e14f386ceaf1ad7d9b971d1fabc3886a87
3
+ metadata.gz: 753510fc5e4c0a2d565f29d0e5b894f653f83c23cb3354d27a631a74852ea421
4
+ data.tar.gz: 3a2ec09307309a917f4530a937b7443b061c2e671bc5e84a627e487ee7536a3c
5
5
  SHA512:
6
- metadata.gz: 7825cb9106d8861d3084ca9eee9f0c52d0dfbfbc409a0a2320cd89e1487a8ed8334fdf1e1d629ab6b65d25da3e514f4dfdf059be88bdd2bad23f6d08e8ca2b63
7
- data.tar.gz: ccad1201ba56171fd64a9869dfa538bd24ba6cc44feb0fc52f40c17962a6c582dd1589c375226ae21f289dabe67097c70eb92ad807b161ac6e40ff10b3133a55
6
+ metadata.gz: dcb05fa986c51a3a69da4bb812042777e6c878d778215af6466d58aad90f643691e64399a98e17d403e79b5eba607287ed6129323626d69c0bcd8136056893d2
7
+ data.tar.gz: cf4e7360906081835272daeaa168c927d9d2a46787d4328afe22a1db03e82de9167a07280d6e41cdf81eeb26ff2de644494185a07a1a26124a523016ee821b4d
data/bin/sitedog_cli CHANGED
@@ -4,6 +4,7 @@ require 'bundler/setup'
4
4
  require 'sitedog_parser'
5
5
  require 'optparse'
6
6
  require 'logger'
7
+ require 'yaml'
7
8
 
8
9
  # Set default options
9
10
  options = {
@@ -102,7 +103,7 @@ def compact_json_generate(data)
102
103
  formatted_data.each_with_index do |(domain, services), domain_index|
103
104
  output << " \"#{domain}\": {\n"
104
105
 
105
- services.keys.sort.each_with_index do |service_type, service_index|
106
+ services.keys.each_with_index do |service_type, service_index|
106
107
  service_data = services[service_type]
107
108
 
108
109
  # Start line with service type
@@ -132,10 +133,25 @@ end
132
133
  begin
133
134
  logger.debug "Processing file: #{file_path}"
134
135
 
135
- # Convert YAML to JSON
136
+ # Load YAML to check raw data
137
+ raw_yaml = YAML.load_file(file_path)
138
+ if options[:debug]
139
+ logger.debug "Raw YAML data for debug:"
140
+ logger.debug raw_yaml.inspect
141
+ logger.debug ""
142
+ end
143
+
144
+ # Convert YAML to hash
136
145
  data = SitedogParser::Parser.to_hash(file_path, { logger: logger })
137
146
  logger.debug "Data converted to hash"
138
147
 
148
+ # Debug the parsed data
149
+ if options[:debug]
150
+ logger.debug "Parsed data structure:"
151
+ logger.debug data.inspect
152
+ logger.debug ""
153
+ end
154
+
139
155
  # Convert to JSON based on formatting options
140
156
  json_data = if options[:compact_children]
141
157
  logger.debug "Generating JSON with compact inner objects"
data/lib/service.rb CHANGED
@@ -1,11 +1,13 @@
1
- class Service < Data.define(:service, :url, :children, :image_url)
2
- def initialize(service:, url: nil, children: [], image_url: nil)
1
+ class Service < Data.define(:service, :url, :children, :image_url, :properties, :value)
2
+ def initialize(service:, url: nil, children: [], image_url: nil, properties: {}, value: nil)
3
3
  raise ArgumentError, "Service cannot be empty" if service.nil? || service.empty?
4
4
 
5
5
  service => String
6
6
  url => String if url
7
7
  children => Array if children
8
8
  image_url => String if image_url
9
+ properties => Hash if properties
10
+ # value может быть любого типа, поэтому не проверяем
9
11
 
10
12
  super
11
13
  end
@@ -61,6 +61,10 @@ class ServiceFactory
61
61
  in Hash
62
62
  logger.debug "hash: #{data}"
63
63
 
64
+ # Check if all values are URL-like strings
65
+ all_url_like = data.values.all? { |v| v.is_a?(String) && UrlChecker.url_like?(v) }
66
+ logger.debug "All values are URL-like: #{all_url_like}, values: #{data.values.map { |v| "#{v.class}: #{v}" }.join(', ')}"
67
+
64
68
  # Protection from nil values in key fields
65
69
  if (data.key?(:service) || data.key?("service")) &&
66
70
  (data[:service].nil? || data["service"].nil?)
@@ -77,6 +81,8 @@ class ServiceFactory
77
81
  # Первый приоритет - поиск в словаре по URL
78
82
  child_dict_entry = dictionary.match(url_value)
79
83
 
84
+ logger.debug "Child for #{key}: service_name=#{service_name}, url=#{url_value}, dict_entry=#{child_dict_entry}"
85
+
80
86
  if child_dict_entry && child_dict_entry['name']
81
87
  # Если нашли запись в словаре по URL, используем её имя вместо ключа
82
88
  service_name = child_dict_entry['name']
@@ -103,10 +109,45 @@ class ServiceFactory
103
109
 
104
110
  # Create parent service with child elements
105
111
  if service_type && children.any?
112
+ logger.debug "Returning service for #{service_type} with #{children.size} children"
106
113
  return Service.new(service: service_type.to_s, children: children)
107
114
  elsif children.size == 1
108
- # If only one service and no service_type, return it directly
115
+ # If only one service and no service_type, return it
116
+ logger.debug "Returning single child service (no service_type)"
109
117
  return children.first
118
+ else
119
+ logger.debug "Not returning a service for #{data.inspect}, service_type=#{service_type}, children.size=#{children.size}"
120
+ end
121
+ # 1.5 Check if hash contains at least some URL-like strings
122
+ elsif data.values.any? { |v| v.is_a?(String) && UrlChecker.url_like?(v) }
123
+ logger.debug "hash with some URL-like values: #{data.inspect}"
124
+
125
+ # Debug: Check each value for URL-like
126
+ data.each do |k, v|
127
+ if v.is_a?(String)
128
+ logger.debug " Checking #{k}: #{v} - URL-like? #{UrlChecker.url_like?(v)}"
129
+ else
130
+ logger.debug " Skipping non-string #{k}: #{v.class}"
131
+ end
132
+ end
133
+
134
+ # Сохраняем все значения в properties, сохраняя порядок
135
+ properties = {}
136
+ data.each do |key, value|
137
+ properties[key.to_s] = value
138
+ logger.debug "Added property for #{key}: #{value}"
139
+ end
140
+
141
+ # Create service with properties only
142
+ if !properties.empty?
143
+ service = Service.new(
144
+ service: service_type.to_s,
145
+ url: nil,
146
+ properties: properties,
147
+ children: [] # Пустой массив children
148
+ )
149
+ logger.debug "Returning service with #{properties.size} properties"
150
+ return service
110
151
  end
111
152
  end
112
153
 
@@ -225,19 +266,38 @@ class ServiceFactory
225
266
  in Array
226
267
  logger.debug "array: #{data}"
227
268
 
228
- # Create services from array elements
229
- children = data.map { |item| create(item, service_type, dictionary_path) }.compact
269
+ # Create services from all array elements for children
270
+ children = []
271
+ data.each_with_index do |item, index|
272
+ # Для URL-подобных строк используем стандартный механизм
273
+ if item.is_a?(String) && UrlChecker.url_like?(item)
274
+ child_service = create(item, service_type, dictionary_path, options)
275
+ children << child_service if child_service
276
+ else
277
+ # Для простых значений создаем сервис с value
278
+ child_service = Service.new(
279
+ service: service_type ? service_type.to_s : "value",
280
+ url: nil,
281
+ properties: {},
282
+ value: item # Используем поле value
283
+ )
284
+ children << child_service
285
+ logger.debug "Created service with value for item #{index}: #{item.inspect}"
286
+ end
287
+ end
230
288
 
231
- # If there are child services, create a parent service with them
232
- if children.any? && service_type
233
- return Service.new(service: service_type.to_s, children: children)
234
- elsif children.size == 1
235
- # If only one child service, return it
236
- return children.first
289
+ # Return service with all items as children
290
+ if service_type
291
+ result = Service.new(
292
+ service: service_type.to_s,
293
+ url: nil,
294
+ children: children
295
+ )
296
+ logger.debug "Returning array service with #{children.size} children"
297
+ return result
237
298
  end
238
299
 
239
- # If no child services or no name for parent service,
240
- # return nil
300
+ # Fallback to nil if no service_type
241
301
  return nil
242
302
  else
243
303
  # Handle values that don't match any pattern
@@ -1,3 +1,3 @@
1
1
  module SitedogParser
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -68,9 +68,16 @@ module SitedogParser
68
68
  # Для обычных полей создаем сервис
69
69
  service = ServiceFactory.create(data, service_type, dictionary_path, options)
70
70
 
71
+ # Debug output
72
+ if logger
73
+ logger.debug "ServiceFactory.create for #{service_type}: #{service.inspect}"
74
+ end
75
+
71
76
  if service
72
77
  services[service_type] ||= []
73
78
  services[service_type] << service
79
+ elsif logger
80
+ logger.debug "Service for #{service_type} is nil, field will be skipped"
74
81
  end
75
82
  end
76
83
  end
@@ -103,11 +110,49 @@ module SitedogParser
103
110
  if service_data.is_a?(Array) && service_data.first.is_a?(Service)
104
111
  # Преобразуем массив сервисов в массив хешей
105
112
  result[domain_key][service_type_key] = service_data.map do |service|
106
- {
113
+ service_hash = {
107
114
  'service' => service.service,
108
- 'url' => service.url,
109
- 'children' => service.children.map { |child| {'service' => child.service, 'url' => child.url} }
115
+ 'url' => service.url
110
116
  }
117
+
118
+ # Добавляем image_url если он есть
119
+ if service.image_url
120
+ service_hash['image_url'] = service.image_url
121
+ end
122
+
123
+ # Добавляем children только если они есть
124
+ if service.children && !service.children.empty?
125
+ service_hash['children'] = service.children.map do |child|
126
+ child_hash = {
127
+ 'service' => child.service,
128
+ 'url' => child.url
129
+ }
130
+
131
+ # Добавляем image_url для детей если он есть
132
+ if child.image_url
133
+ child_hash['image_url'] = child.image_url
134
+ end
135
+
136
+ # Добавляем properties для children если они есть
137
+ if child.properties && !child.properties.empty?
138
+ child_hash['properties'] = child.properties
139
+ end
140
+
141
+ # Добавляем value для children если оно есть
142
+ if child.value
143
+ child_hash['value'] = child.value
144
+ end
145
+
146
+ child_hash
147
+ end
148
+ end
149
+
150
+ # Добавляем properties, если они есть
151
+ if service.properties && !service.properties.empty?
152
+ service_hash['properties'] = service.properties
153
+ end
154
+
155
+ service_hash
111
156
  end
112
157
  else
113
158
  # Сохраняем простые поля как есть
data/lib/url_checker.rb CHANGED
@@ -28,7 +28,7 @@ module UrlChecker
28
28
  end
29
29
 
30
30
  # Check for standard URLs
31
- pattern = /^((?:https?|ftp|sftp|ftps|ssh|git|ws|wss):\/\/)?[a-zA-Z0-9][-a-zA-Z0-9.]+\.[a-zA-Z]{2,}(:[0-9]+)?(\/[-a-zA-Z0-9%_.~#+]*)*(\?[-a-zA-Z0-9%_&=.~#+]*)?(#[-a-zA-Z0-9%_&=.~#+\/]*)?$/
31
+ pattern = /^((?:https?|ftp|sftp|ftps|ssh|git|ws|wss):\/\/)?((?:[a-zA-Z0-9][-a-zA-Z0-9.]+\.[a-zA-Z]{2,})|(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(:[0-9]+)?(\/[-a-zA-Z0-9%_.~#+]*)*(\?[-a-zA-Z0-9%_&=.~#+]*)?(#[-a-zA-Z0-9%_&=.~#+\/]*)?$/
32
32
 
33
33
  !!string.match(pattern)
34
34
  end
@@ -61,6 +61,11 @@ module UrlChecker
61
61
  # Remove protocol and www prefix if present
62
62
  domain = url.gsub(%r{^(?:https?://)?(?:www\.)?}, "")
63
63
 
64
+ # Check if it's an IP address
65
+ if domain.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/)
66
+ return "IP Address"
67
+ end
68
+
64
69
  # Extract domain from URL by removing everything after first / or : or ? or #
65
70
  domain = domain.split(/[:\/?#]/).first
66
71
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitedog_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ivan Nemytchenko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-07 00:00:00.000000000 Z
11
+ date: 2025-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler