png_conform 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +116 -6
- data/Gemfile +1 -1
- data/config/validation_profiles.yml +105 -0
- data/lib/png_conform/analyzers/comparison_analyzer.rb +41 -7
- data/lib/png_conform/analyzers/metrics_analyzer.rb +6 -9
- data/lib/png_conform/analyzers/optimization_analyzer.rb +30 -24
- data/lib/png_conform/analyzers/resolution_analyzer.rb +31 -32
- data/lib/png_conform/cli.rb +12 -0
- data/lib/png_conform/commands/check_command.rb +118 -53
- data/lib/png_conform/configuration.rb +147 -0
- data/lib/png_conform/container.rb +113 -0
- data/lib/png_conform/models/validation_result.rb +30 -4
- data/lib/png_conform/pipelines/pipeline_result.rb +39 -0
- data/lib/png_conform/pipelines/stages/analysis_stage.rb +35 -0
- data/lib/png_conform/pipelines/stages/base_stage.rb +23 -0
- data/lib/png_conform/pipelines/stages/chunk_validation_stage.rb +74 -0
- data/lib/png_conform/pipelines/stages/sequence_validation_stage.rb +77 -0
- data/lib/png_conform/pipelines/stages/signature_validation_stage.rb +41 -0
- data/lib/png_conform/pipelines/validation_pipeline.rb +90 -0
- data/lib/png_conform/readers/full_load_reader.rb +13 -4
- data/lib/png_conform/readers/streaming_reader.rb +27 -2
- data/lib/png_conform/reporters/color_reporter.rb +17 -14
- data/lib/png_conform/reporters/visual_elements.rb +22 -16
- data/lib/png_conform/services/analysis_manager.rb +120 -0
- data/lib/png_conform/services/chunk_processor.rb +195 -0
- data/lib/png_conform/services/file_signature.rb +226 -0
- data/lib/png_conform/services/file_strategy.rb +78 -0
- data/lib/png_conform/services/lru_cache.rb +170 -0
- data/lib/png_conform/services/parallel_validator.rb +118 -0
- data/lib/png_conform/services/profile_manager.rb +41 -12
- data/lib/png_conform/services/result_builder.rb +299 -0
- data/lib/png_conform/services/validation_cache.rb +210 -0
- data/lib/png_conform/services/validation_orchestrator.rb +188 -0
- data/lib/png_conform/services/validation_service.rb +53 -337
- data/lib/png_conform/services/validator_pool.rb +142 -0
- data/lib/png_conform/utils/colorizer.rb +149 -0
- data/lib/png_conform/validators/chunk_registry.rb +12 -0
- data/lib/png_conform/validators/streaming_idat_validator.rb +123 -0
- data/lib/png_conform/version.rb +1 -1
- data/png_conform.gemspec +1 -0
- metadata +38 -2
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../validators/chunk_registry"
|
|
4
|
+
require_relative "validator_pool"
|
|
5
|
+
|
|
6
|
+
module PngConform
|
|
7
|
+
module Services
|
|
8
|
+
# Processes chunks through validation pipeline
|
|
9
|
+
#
|
|
10
|
+
# The ChunkProcessor handles:
|
|
11
|
+
# - Iterating through chunks from the reader
|
|
12
|
+
# - Creating validators via ChunkRegistry
|
|
13
|
+
# - Collecting validation results
|
|
14
|
+
# - Handling unknown chunk types
|
|
15
|
+
#
|
|
16
|
+
# This class extracts chunk processing logic from ValidationService
|
|
17
|
+
# following Single Responsibility Principle.
|
|
18
|
+
#
|
|
19
|
+
class ChunkProcessor
|
|
20
|
+
# Initialize chunk processor
|
|
21
|
+
#
|
|
22
|
+
# @param reader [Object] File reader (StreamingReader or FullLoadReader)
|
|
23
|
+
# @param context [ValidationContext] Validation context for state
|
|
24
|
+
# @param options [Hash] CLI options for controlling behavior
|
|
25
|
+
def initialize(reader, context, options = {})
|
|
26
|
+
@reader = reader
|
|
27
|
+
@context = context
|
|
28
|
+
@options = options
|
|
29
|
+
@validator_pool = ValidatorPool.new(options.slice(:max_per_type))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Process all chunks from the reader
|
|
33
|
+
#
|
|
34
|
+
# Uses batch validation by default for performance (groups chunks by type).
|
|
35
|
+
# Falls back to individual processing if batch_disabled option is set.
|
|
36
|
+
# Supports early termination if fail_fast option is enabled.
|
|
37
|
+
#
|
|
38
|
+
# @yield [chunk] Optional block to receive chunks as they're processed
|
|
39
|
+
# @return [void]
|
|
40
|
+
def process(&block)
|
|
41
|
+
# Use batch validation by default (faster for files with many chunks)
|
|
42
|
+
if @options[:batch_enabled] == false
|
|
43
|
+
process_individual(&block)
|
|
44
|
+
else
|
|
45
|
+
process_batch_inline(&block)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Process chunks in batch inline (collecting from reader)
|
|
52
|
+
#
|
|
53
|
+
# Performance optimization: groups chunks by type and validates
|
|
54
|
+
# in batches to reduce validator instantiation overhead.
|
|
55
|
+
#
|
|
56
|
+
# @return [void]
|
|
57
|
+
def process_batch_inline
|
|
58
|
+
# Collect all chunks first
|
|
59
|
+
chunk_groups = Hash.new { |h, k| h[k] = [] }
|
|
60
|
+
|
|
61
|
+
@reader.each_chunk do |chunk|
|
|
62
|
+
chunk_groups[chunk.chunk_type.to_s] << chunk
|
|
63
|
+
yield chunk if block_given?
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Validate each group together
|
|
67
|
+
chunk_groups.each do |chunk_type, chunks|
|
|
68
|
+
validate_chunk_batch(chunk_type, chunks)
|
|
69
|
+
|
|
70
|
+
# Early termination check after each batch
|
|
71
|
+
break if @options[:fail_fast] && @context.has_errors?
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Process chunks individually (original behavior)
|
|
76
|
+
#
|
|
77
|
+
# @return [void]
|
|
78
|
+
def process_individual
|
|
79
|
+
@reader.each_chunk do |chunk|
|
|
80
|
+
validate_chunk(chunk)
|
|
81
|
+
yield chunk if block_given?
|
|
82
|
+
|
|
83
|
+
break if @options[:fail_fast] && @context.has_errors?
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Process chunks in batch (performance optimization)
|
|
88
|
+
#
|
|
89
|
+
# Groups chunks by type and validates in batches to reduce
|
|
90
|
+
# validator instantiation overhead.
|
|
91
|
+
#
|
|
92
|
+
# @param chunks [Array] Array of chunks to validate
|
|
93
|
+
# @return [void]
|
|
94
|
+
def process_batch(chunks)
|
|
95
|
+
grouped = chunks.group_by { |c| c.chunk_type.to_s }
|
|
96
|
+
|
|
97
|
+
grouped.each do |chunk_type, chunk_group|
|
|
98
|
+
validate_chunk_batch(chunk_type, chunk_group)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Validate a single chunk
|
|
103
|
+
#
|
|
104
|
+
# Gets validator from pool for this chunk type from ChunkRegistry,
|
|
105
|
+
# executes validation, and handles unknown chunks.
|
|
106
|
+
#
|
|
107
|
+
# @param chunk [Object] Chunk to validate
|
|
108
|
+
# @return [void]
|
|
109
|
+
def validate_chunk(chunk)
|
|
110
|
+
chunk_type = chunk.chunk_type.to_s
|
|
111
|
+
validator_class = Validators::ChunkRegistry.validator_for(chunk_type)
|
|
112
|
+
|
|
113
|
+
if validator_class
|
|
114
|
+
validator = @validator_pool.acquire(chunk_type, validator_class,
|
|
115
|
+
chunk, @context)
|
|
116
|
+
begin
|
|
117
|
+
validator.validate
|
|
118
|
+
ensure
|
|
119
|
+
@validator_pool.release(chunk_type, validator)
|
|
120
|
+
end
|
|
121
|
+
else
|
|
122
|
+
handle_unknown_chunk(chunk)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Mark chunk as seen AFTER validation
|
|
126
|
+
# This allows validators to check for duplicates before marking
|
|
127
|
+
@context.mark_chunk_seen(chunk_type, chunk)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Validate a batch of chunks of the same type
|
|
131
|
+
#
|
|
132
|
+
# Performance optimization: reduces validator instantiation
|
|
133
|
+
# overhead by pooling validators for multiple chunks of same type.
|
|
134
|
+
#
|
|
135
|
+
# @param chunk_type [String] Type of chunks in batch
|
|
136
|
+
# @param chunks [Array] Array of chunks of same type
|
|
137
|
+
# @return [void]
|
|
138
|
+
def validate_chunk_batch(chunk_type, chunks)
|
|
139
|
+
validator_class = Validators::ChunkRegistry.validator_for(chunk_type)
|
|
140
|
+
|
|
141
|
+
unless validator_class
|
|
142
|
+
# No validator for this chunk type - handle as unknown
|
|
143
|
+
chunks.each do |chunk|
|
|
144
|
+
handle_unknown_chunk(chunk)
|
|
145
|
+
@context.mark_chunk_seen(chunk.chunk_type.to_s, chunk)
|
|
146
|
+
end
|
|
147
|
+
return
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
chunks.each do |chunk|
|
|
151
|
+
validator = @validator_pool.acquire(chunk_type, validator_class,
|
|
152
|
+
chunk, @context)
|
|
153
|
+
begin
|
|
154
|
+
validator.validate
|
|
155
|
+
ensure
|
|
156
|
+
@validator_pool.release(chunk_type, validator)
|
|
157
|
+
end
|
|
158
|
+
@context.mark_chunk_seen(chunk_type, chunk)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Handle unknown chunk types
|
|
163
|
+
#
|
|
164
|
+
# Unknown chunks are checked for safety:
|
|
165
|
+
# - If ancillary (bit 5 of first byte = 1), it's safe to ignore
|
|
166
|
+
# - If critical (bit 5 = 0), it's an error
|
|
167
|
+
#
|
|
168
|
+
# @param chunk [Object] Unknown chunk
|
|
169
|
+
# @return [void]
|
|
170
|
+
def handle_unknown_chunk(chunk)
|
|
171
|
+
chunk_type = chunk.chunk_type.to_s
|
|
172
|
+
first_byte = chunk_type.bytes[0]
|
|
173
|
+
|
|
174
|
+
# Bit 5 (0x20) of first byte indicates ancillary vs critical
|
|
175
|
+
if (first_byte & 0x20).zero?
|
|
176
|
+
# Critical chunk - must be recognized
|
|
177
|
+
@context.add_error(
|
|
178
|
+
chunk_type: chunk_type,
|
|
179
|
+
message: "Unknown critical chunk type: #{chunk_type}",
|
|
180
|
+
severity: :error,
|
|
181
|
+
offset: chunk.abs_offset,
|
|
182
|
+
)
|
|
183
|
+
else
|
|
184
|
+
# Ancillary chunk - safe to ignore
|
|
185
|
+
@context.add_error(
|
|
186
|
+
chunk_type: chunk_type,
|
|
187
|
+
message: "Unknown ancillary chunk type: #{chunk_type} (ignored)",
|
|
188
|
+
severity: :info,
|
|
189
|
+
offset: chunk.abs_offset,
|
|
190
|
+
)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module PngConform
|
|
6
|
+
module Services
|
|
7
|
+
# File signature service for fast comparison
|
|
8
|
+
#
|
|
9
|
+
# Creates cryptographic signatures of PNG files to enable fast
|
|
10
|
+
# equality checking without full validation. This is particularly
|
|
11
|
+
# useful for comparison operations and caching.
|
|
12
|
+
#
|
|
13
|
+
# The signature is based on key file characteristics that are
|
|
14
|
+
# quick to compute but provide strong uniqueness guarantees.
|
|
15
|
+
#
|
|
16
|
+
class FileSignature
|
|
17
|
+
attr_reader :result, :signature, :metadata
|
|
18
|
+
|
|
19
|
+
class << self
|
|
20
|
+
# Create signature from ValidationResult
|
|
21
|
+
#
|
|
22
|
+
# @param result [ValidationResult] Validation result to signature
|
|
23
|
+
# @return [FileSignature] FileSignature instance
|
|
24
|
+
def from_result(result)
|
|
25
|
+
new(result).compute_signature
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Create signature directly from file metadata
|
|
29
|
+
#
|
|
30
|
+
# @param file_path [String] Path to PNG file
|
|
31
|
+
# @param options [Hash] Options for signature creation
|
|
32
|
+
# @return [FileSignature] FileSignature instance
|
|
33
|
+
def from_file(file_path, _options = {})
|
|
34
|
+
unless File.exist?(file_path)
|
|
35
|
+
raise ArgumentError, "File not found: #{file_path}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Quick signature from file metadata without full validation
|
|
39
|
+
metadata = extract_quick_metadata(file_path)
|
|
40
|
+
new(nil, metadata).compute_signature
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Initialize with validation result or metadata hash
|
|
45
|
+
#
|
|
46
|
+
# @param result [ValidationResult, nil] Validation result
|
|
47
|
+
# @param metadata [Hash, nil] File metadata hash
|
|
48
|
+
def initialize(result, metadata = nil)
|
|
49
|
+
@result = result
|
|
50
|
+
@metadata = metadata || extract_metadata_from_result
|
|
51
|
+
@signature = nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Compute the signature
|
|
55
|
+
#
|
|
56
|
+
# @return [FileSignature] Self for chaining
|
|
57
|
+
def compute_signature
|
|
58
|
+
@signature = generate_signature
|
|
59
|
+
self
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Check if two signatures are equal
|
|
63
|
+
#
|
|
64
|
+
# @param other [FileSignature, String] Another signature or signature string
|
|
65
|
+
# @return [Boolean] True if signatures match
|
|
66
|
+
def ==(other)
|
|
67
|
+
return false unless other.is_a?(self.class)
|
|
68
|
+
|
|
69
|
+
@signature == other.signature
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Get signature as hex string
|
|
73
|
+
#
|
|
74
|
+
# @return [String] Hex signature
|
|
75
|
+
def to_hex
|
|
76
|
+
@signature
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Get signature as bytes
|
|
80
|
+
#
|
|
81
|
+
# @return [String] Signature bytes
|
|
82
|
+
def to_bytes
|
|
83
|
+
[@signature].pack("H*")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Get short signature (first 8 bytes of hex)
|
|
87
|
+
#
|
|
88
|
+
# @return [String] Short signature for quick comparison
|
|
89
|
+
def short_signature
|
|
90
|
+
@signature[0..15]
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# Extract metadata from validation result
|
|
96
|
+
#
|
|
97
|
+
# @return [Hash] Metadata hash
|
|
98
|
+
def extract_metadata_from_result
|
|
99
|
+
return {} unless @result
|
|
100
|
+
|
|
101
|
+
{
|
|
102
|
+
file_size: @result.file_size,
|
|
103
|
+
chunk_count: @result.chunks.count,
|
|
104
|
+
chunk_types: @result.chunks.map(&:type).sort,
|
|
105
|
+
chunk_sizes: @result.chunks.map(&:length).sort,
|
|
106
|
+
crcs: @result.chunks.map do |c|
|
|
107
|
+
c.crc_actual || c.crc_expected
|
|
108
|
+
end.compact,
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Generate signature from metadata
|
|
113
|
+
#
|
|
114
|
+
# Uses SHA-256 on concatenated metadata for cryptographic strength.
|
|
115
|
+
#
|
|
116
|
+
# @return [String] Hex signature
|
|
117
|
+
def generate_signature
|
|
118
|
+
signature_data = signature_string
|
|
119
|
+
Digest::SHA256.hexdigest(signature_data)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Build signature string from metadata
|
|
123
|
+
#
|
|
124
|
+
# Creates a deterministic string representation of key file attributes.
|
|
125
|
+
#
|
|
126
|
+
# @return [String] Signature string
|
|
127
|
+
def signature_string
|
|
128
|
+
StringIO.new.tap do |io|
|
|
129
|
+
# File size (8 bytes)
|
|
130
|
+
io << [@metadata[:file_size]].pack("N")
|
|
131
|
+
|
|
132
|
+
# Chunk count (4 bytes)
|
|
133
|
+
io << [@metadata[:chunk_count]].pack("N")
|
|
134
|
+
|
|
135
|
+
# Chunk types (sorted for consistency)
|
|
136
|
+
@metadata[:chunk_types].each do |type|
|
|
137
|
+
io << type.ljust(4, "\0")[0..3] # Fixed 4-char chunk types
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Chunk sizes (sorted for consistency)
|
|
141
|
+
@metadata[:chunk_sizes].each do |size|
|
|
142
|
+
io << [size].pack("N")
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# CRC checksums (for integrity verification)
|
|
146
|
+
@metadata[:crcs].each do |crc|
|
|
147
|
+
io << [crc].pack("N")
|
|
148
|
+
end
|
|
149
|
+
end.string
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Extract quick metadata from file without full validation
|
|
153
|
+
#
|
|
154
|
+
# Reads just the PNG signature and chunk headers to create
|
|
155
|
+
# a signature without loading entire file into memory.
|
|
156
|
+
#
|
|
157
|
+
# @param file_path [String] Path to PNG file
|
|
158
|
+
# @return [Hash] Quick metadata hash
|
|
159
|
+
def extract_quick_metadata(file_path)
|
|
160
|
+
File.open(file_path, "rb") do |file|
|
|
161
|
+
# Verify PNG signature first
|
|
162
|
+
sig = file.read(8)
|
|
163
|
+
expected_sig = [137, 80, 78, 71, 13, 10, 26, 10].pack("C*")
|
|
164
|
+
unless sig == expected_sig
|
|
165
|
+
raise ArgumentError, "Not a valid PNG file: #{file_path}"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Quick scan to count chunks and collect metadata
|
|
169
|
+
chunk_count = 0
|
|
170
|
+
chunk_types = []
|
|
171
|
+
chunk_sizes = []
|
|
172
|
+
total_data_size = 0
|
|
173
|
+
|
|
174
|
+
# Read entire file - no arbitrary limits for complete signatures
|
|
175
|
+
while file.pos < File.size(file_path)
|
|
176
|
+
# Read chunk length (4 bytes)
|
|
177
|
+
length_bytes = file.read(4)
|
|
178
|
+
break if length_bytes.nil? || length_bytes.length < 4
|
|
179
|
+
|
|
180
|
+
chunk_length = length_bytes.unpack1("N")
|
|
181
|
+
break if chunk_length > 10 * 1024 * 1024 # Sanity check
|
|
182
|
+
|
|
183
|
+
# Read chunk type (4 bytes)
|
|
184
|
+
type_bytes = file.read(4)
|
|
185
|
+
break if type_bytes.nil? || type_bytes.length < 4
|
|
186
|
+
|
|
187
|
+
chunk_type = type_bytes
|
|
188
|
+
|
|
189
|
+
# Skip data
|
|
190
|
+
file.seek(chunk_length, IO::SEEK_CUR)
|
|
191
|
+
|
|
192
|
+
# Read CRC (4 bytes)
|
|
193
|
+
crc_bytes = file.read(4)
|
|
194
|
+
break if crc_bytes.nil? || crc_bytes.length < 4
|
|
195
|
+
|
|
196
|
+
# Record metadata
|
|
197
|
+
chunk_count += 1
|
|
198
|
+
chunk_types << chunk_type
|
|
199
|
+
chunk_sizes << chunk_length
|
|
200
|
+
total_data_size += chunk_length
|
|
201
|
+
|
|
202
|
+
# Stop if we found IEND
|
|
203
|
+
break if chunk_type == "IEND"
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
{
|
|
207
|
+
file_size: File.size(file_path),
|
|
208
|
+
chunk_count: chunk_count,
|
|
209
|
+
chunk_types: chunk_types.sort,
|
|
210
|
+
chunk_sizes: chunk_sizes.sort,
|
|
211
|
+
crcs: [], # Not easily available without validation
|
|
212
|
+
}
|
|
213
|
+
end
|
|
214
|
+
rescue StandardError
|
|
215
|
+
# Fallback to basic metadata
|
|
216
|
+
{
|
|
217
|
+
file_size: File.size(file_path),
|
|
218
|
+
chunk_count: 0,
|
|
219
|
+
chunk_types: [],
|
|
220
|
+
chunk_sizes: [],
|
|
221
|
+
crcs: [],
|
|
222
|
+
}
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PngConform
|
|
4
|
+
module Services
|
|
5
|
+
# File strategy service for reader selection
|
|
6
|
+
#
|
|
7
|
+
# Determines the optimal reader type based on file characteristics
|
|
8
|
+
# and validation options. This enables automatic reader selection for
|
|
9
|
+
# better performance and memory efficiency.
|
|
10
|
+
#
|
|
11
|
+
class FileStrategy
|
|
12
|
+
# Large file threshold (10MB) - files larger than this use streaming
|
|
13
|
+
LARGE_FILE_THRESHOLD = 10 * 1024 * 1024
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
# Determine the appropriate reader type for a file
|
|
17
|
+
#
|
|
18
|
+
# @param file_path [String] Path to the PNG file
|
|
19
|
+
# @param options [Hash] Validation options
|
|
20
|
+
# @return [Symbol] Reader type (:streaming or :full_load)
|
|
21
|
+
def reader_type_for(file_path, options = {})
|
|
22
|
+
file_size = File.size(file_path)
|
|
23
|
+
|
|
24
|
+
# Use streaming for large files unless explicitly forced
|
|
25
|
+
if file_size > LARGE_FILE_THRESHOLD && !options[:force_full]
|
|
26
|
+
:streaming
|
|
27
|
+
else
|
|
28
|
+
:full_load
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if a file is considered large
|
|
33
|
+
#
|
|
34
|
+
# @param file_path [String] Path to the PNG file
|
|
35
|
+
# @return [Boolean] True if file is large
|
|
36
|
+
def large_file?(file_path)
|
|
37
|
+
File.size(file_path) > LARGE_FILE_THRESHOLD
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get the threshold for large file detection
|
|
41
|
+
#
|
|
42
|
+
# @return [Integer] Threshold in bytes
|
|
43
|
+
def large_file_threshold
|
|
44
|
+
LARGE_FILE_THRESHOLD
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Calculate recommended chunk size for processing
|
|
48
|
+
#
|
|
49
|
+
# For large files, smaller chunks are better for memory management
|
|
50
|
+
#
|
|
51
|
+
# @param file_path [String] Path to the PNG file
|
|
52
|
+
# @return [Integer] Recommended chunk size in bytes
|
|
53
|
+
def recommended_chunk_size(file_path)
|
|
54
|
+
file_size = File.size(file_path)
|
|
55
|
+
|
|
56
|
+
if file_size > 100 * 1024 * 1024 # > 100MB
|
|
57
|
+
8192 # 8KB chunks for very large files
|
|
58
|
+
elsif file_size > 10 * 1024 * 1024 # > 10MB
|
|
59
|
+
16384 # 16KB chunks for large files
|
|
60
|
+
else
|
|
61
|
+
65536 # 64KB chunks for normal files
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Estimate memory usage for full-load reader
|
|
66
|
+
#
|
|
67
|
+
# @param file_path [String] Path to the PNG file
|
|
68
|
+
# @return [Integer] Estimated memory in bytes
|
|
69
|
+
def estimate_memory_usage(file_path)
|
|
70
|
+
file_size = File.size(file_path)
|
|
71
|
+
# Estimate: file_size + overhead for chunks (rough estimate)
|
|
72
|
+
# Each chunk adds overhead for BinData structures
|
|
73
|
+
file_size * 1.2
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PngConform
|
|
4
|
+
module Services
|
|
5
|
+
# LRU (Least Recently Used) Cache implementation
|
|
6
|
+
#
|
|
7
|
+
# Provides efficient caching with automatic eviction of least recently
|
|
8
|
+
# used items when capacity is reached. Thread-safe for basic operations.
|
|
9
|
+
#
|
|
10
|
+
class LRUCache
|
|
11
|
+
attr_reader :max_size, :current_size
|
|
12
|
+
|
|
13
|
+
# Initialize LRU cache
|
|
14
|
+
#
|
|
15
|
+
# @param max_size [Integer] Maximum number of items to store
|
|
16
|
+
# @param options [Hash] Additional options
|
|
17
|
+
def initialize(max_size, options = {})
|
|
18
|
+
@max_size = max_size
|
|
19
|
+
@cache = {}
|
|
20
|
+
@order = [] # Tracks access order (most recent at end)
|
|
21
|
+
@current_size = 0
|
|
22
|
+
@thread_safe = options[:thread_safe] || false
|
|
23
|
+
@mutex = @thread_safe ? Mutex.new : nil
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Get value for key
|
|
27
|
+
#
|
|
28
|
+
# @param key [Object] Cache key
|
|
29
|
+
# @return [Object, nil] Cached value or nil if not found
|
|
30
|
+
def [](key)
|
|
31
|
+
return @cache[key] if @thread_safe && !@mutex.synchronize do
|
|
32
|
+
@cache.key?(key)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
with_synchronization do
|
|
36
|
+
return nil unless @cache.key?(key)
|
|
37
|
+
|
|
38
|
+
# Move to end (most recently used)
|
|
39
|
+
@order.delete(key)
|
|
40
|
+
@order.push(key)
|
|
41
|
+
|
|
42
|
+
@cache[key]
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Set value for key
|
|
47
|
+
#
|
|
48
|
+
# @param key [Object] Cache key
|
|
49
|
+
# @param value [Object] Value to cache
|
|
50
|
+
# @return [Object] The cached value
|
|
51
|
+
def []=(key, value)
|
|
52
|
+
with_synchronization do
|
|
53
|
+
# Remove existing key if updating (to re-insert at end)
|
|
54
|
+
@order.delete(key) if @cache.key?(key)
|
|
55
|
+
|
|
56
|
+
@cache[key] = value
|
|
57
|
+
@order.push(key)
|
|
58
|
+
|
|
59
|
+
# Evict oldest if over capacity
|
|
60
|
+
if @order.size > @max_size
|
|
61
|
+
oldest = @order.shift
|
|
62
|
+
@cache.delete(oldest)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
@current_size = @cache.size
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
value
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Check if key exists
|
|
72
|
+
#
|
|
73
|
+
# @param key [Object] Cache key
|
|
74
|
+
# @return [Boolean] True if key exists in cache
|
|
75
|
+
def key?(key)
|
|
76
|
+
@cache.key?(key)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if cache is empty
|
|
80
|
+
#
|
|
81
|
+
# @return [Boolean] True if cache has no items
|
|
82
|
+
def empty?
|
|
83
|
+
@cache.empty?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Clear all cached items
|
|
87
|
+
#
|
|
88
|
+
# @return [void]
|
|
89
|
+
def clear
|
|
90
|
+
with_synchronization do
|
|
91
|
+
@cache.clear
|
|
92
|
+
@order.clear
|
|
93
|
+
@current_size = 0
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get all keys
|
|
98
|
+
#
|
|
99
|
+
# @return [Array<Object>] All cached keys (in LRU order)
|
|
100
|
+
def keys
|
|
101
|
+
@order.dup # Return copy to avoid external modification
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Get cache statistics
|
|
105
|
+
#
|
|
106
|
+
# @return [Hash] Cache statistics
|
|
107
|
+
def stats
|
|
108
|
+
{
|
|
109
|
+
size: @cache.size,
|
|
110
|
+
max_size: @max_size,
|
|
111
|
+
usage_percent: ((@cache.size.to_f / @max_size) * 100).round(1),
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Delete a specific key
|
|
116
|
+
#
|
|
117
|
+
# @param key [Object] Cache key to delete
|
|
118
|
+
# @return [Object, nil] Deleted value or nil if not found
|
|
119
|
+
def delete(key)
|
|
120
|
+
with_synchronization do
|
|
121
|
+
@order.delete(key)
|
|
122
|
+
@cache.delete(key)
|
|
123
|
+
@current_size = @cache.size
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Peek at value without affecting LRU order
|
|
128
|
+
#
|
|
129
|
+
# @param key [Object] Cache key
|
|
130
|
+
# @return [Object, nil] Cached value or nil if not found
|
|
131
|
+
def peek(key)
|
|
132
|
+
@cache[key]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Get most recently used item
|
|
136
|
+
#
|
|
137
|
+
# @return [Array] [key, value] or nil if empty
|
|
138
|
+
def mru
|
|
139
|
+
return nil if @order.empty?
|
|
140
|
+
|
|
141
|
+
key = @order.last
|
|
142
|
+
[key, @cache[key]]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Get least recently used item
|
|
146
|
+
#
|
|
147
|
+
# @return [Array] [key, value] or nil if empty
|
|
148
|
+
def lru
|
|
149
|
+
return nil if @order.empty?
|
|
150
|
+
|
|
151
|
+
key = @order.first
|
|
152
|
+
[key, @cache[key]]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
# Execute block with optional synchronization
|
|
158
|
+
#
|
|
159
|
+
# @yield Block to execute
|
|
160
|
+
# @return [Object] Block result
|
|
161
|
+
def with_synchronization(&block)
|
|
162
|
+
if @thread_safe && @mutex
|
|
163
|
+
@mutex.synchronize(&block)
|
|
164
|
+
else
|
|
165
|
+
yield
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|