i18n-context-generator 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/i18n_context_generator/cli.rb +6 -1
- data/lib/i18n_context_generator/config.rb +45 -26
- data/lib/i18n_context_generator/context_extractor/run_logging.rb +26 -0
- data/lib/i18n_context_generator/context_extractor/source_filters.rb +58 -0
- data/lib/i18n_context_generator/context_extractor.rb +74 -11
- data/lib/i18n_context_generator/git_diff.rb +66 -3
- data/lib/i18n_context_generator/searcher/source_discovery.rb +187 -0
- data/lib/i18n_context_generator/searcher.rb +12 -0
- data/lib/i18n_context_generator/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e8761f1cd9b514b6d8de047f23dd3439770039e5c96d639468a3be93db28730
|
|
4
|
+
data.tar.gz: 739ded1c82976fcbd7756a8363a6e667a6bce6641c0544b0cee5482c16355b73
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0bc26c4fe73b9fc8e17d273ce6f471e23f5febde0419ed8eb8c02570a36c945a287e95c85af15ebf02d8a001ee95d46e950a7b4129e4f4f9bf5e483f205b3371
|
|
7
|
+
data.tar.gz: 934d026c57e651fa81ae2c436c49ea41bfcf8498c1ce888090c022a82837f31a21d6cb669c4d22a39ebc2e9cee4ac5ae5d578bf26b1978703ad1112a7030762e
|
|
@@ -36,6 +36,8 @@ module I18nContextGenerator
|
|
|
36
36
|
option :provider, aliases: '-p', enum: %w[anthropic openai], desc: 'LLM provider (default: anthropic)'
|
|
37
37
|
option :model, aliases: '-m', desc: 'LLM model to use'
|
|
38
38
|
option :keys, aliases: '-k', desc: 'Filter keys (comma-separated patterns, supports * wildcard)'
|
|
39
|
+
option :discovery_mode, type: :string, enum: %w[auto translations source],
|
|
40
|
+
desc: 'How to discover entries: auto, translations, or source (default: auto)'
|
|
39
41
|
option :concurrency, type: :numeric, desc: 'Number of concurrent requests (default: 5)'
|
|
40
42
|
option :dry_run, type: :boolean, desc: 'Show what would be processed without calling LLM'
|
|
41
43
|
option :cache, type: :boolean, desc: 'Enable caching of LLM results'
|
|
@@ -101,8 +103,9 @@ module I18nContextGenerator
|
|
|
101
103
|
return if options[:config] && File.exist?(options[:config])
|
|
102
104
|
|
|
103
105
|
return if options[:translations]
|
|
106
|
+
return if options[:discovery_mode] == 'source' && options[:source]
|
|
104
107
|
|
|
105
|
-
say_error 'Error: --translations (-t) is required unless using a config file'
|
|
108
|
+
say_error 'Error: --translations (-t) is required unless using a config file or --discovery-mode source with --source'
|
|
106
109
|
exit 1
|
|
107
110
|
end
|
|
108
111
|
|
|
@@ -182,6 +185,8 @@ module I18nContextGenerator
|
|
|
182
185
|
|
|
183
186
|
# Processing options
|
|
184
187
|
processing:
|
|
188
|
+
# Discovery mode: auto, translations, or source
|
|
189
|
+
discovery_mode: auto
|
|
185
190
|
concurrency: 5
|
|
186
191
|
context_lines: 15
|
|
187
192
|
max_matches_per_key: 3
|
|
@@ -3,42 +3,44 @@
|
|
|
3
3
|
module I18nContextGenerator
|
|
4
4
|
# Holds all configuration for an extraction run, loaded from YAML config files and/or CLI options.
|
|
5
5
|
class Config
|
|
6
|
-
attr_reader :translations, :source_paths, :ignore_patterns,
|
|
6
|
+
attr_reader :translations, :source_paths, :source_line_filter, :ignore_patterns,
|
|
7
7
|
:provider, :model, :concurrency, :context_lines,
|
|
8
8
|
:max_matches_per_key, :output_path, :output_format,
|
|
9
9
|
:no_cache, :dry_run, :key_filter, :write_back,
|
|
10
10
|
:swift_functions, :write_back_to_code, :diff_base, :context_prefix,
|
|
11
11
|
:context_mode, :start_key, :end_key, :include_file_paths,
|
|
12
|
-
:include_translation_comments, :redact_prompts
|
|
12
|
+
:include_translation_comments, :redact_prompts, :discovery_mode
|
|
13
13
|
|
|
14
14
|
DEFAULT_CONTEXT_PREFIX = 'Context: '
|
|
15
15
|
DEFAULT_CONTEXT_MODE = 'replace' # "replace" or "append"
|
|
16
16
|
|
|
17
17
|
def initialize(**attrs)
|
|
18
|
-
@translations = attrs
|
|
19
|
-
@source_paths = attrs
|
|
20
|
-
@
|
|
21
|
-
@
|
|
22
|
-
@
|
|
23
|
-
@
|
|
24
|
-
@
|
|
25
|
-
@
|
|
26
|
-
@
|
|
27
|
-
@
|
|
28
|
-
@
|
|
29
|
-
@
|
|
30
|
-
@
|
|
31
|
-
@
|
|
32
|
-
@
|
|
33
|
-
@
|
|
34
|
-
@
|
|
35
|
-
@
|
|
36
|
-
@
|
|
37
|
-
@
|
|
38
|
-
@
|
|
39
|
-
@
|
|
40
|
-
@
|
|
41
|
-
@
|
|
18
|
+
@translations = fetch_defaulting_value(attrs, :translations, [])
|
|
19
|
+
@source_paths = fetch_defaulting_value(attrs, :source_paths, ['.'])
|
|
20
|
+
@source_line_filter = fetch_config_value(attrs, :source_line_filter, nil)
|
|
21
|
+
@ignore_patterns = fetch_defaulting_value(attrs, :ignore_patterns, [])
|
|
22
|
+
@provider = fetch_defaulting_value(attrs, :provider, 'anthropic')
|
|
23
|
+
@model = fetch_config_value(attrs, :model, nil)
|
|
24
|
+
@concurrency = fetch_defaulting_value(attrs, :concurrency, 5)
|
|
25
|
+
@context_lines = fetch_defaulting_value(attrs, :context_lines, 15)
|
|
26
|
+
@max_matches_per_key = fetch_defaulting_value(attrs, :max_matches_per_key, 3)
|
|
27
|
+
@output_path = fetch_config_value(attrs, :output_path, nil)
|
|
28
|
+
@output_format = fetch_defaulting_value(attrs, :output_format, 'csv')
|
|
29
|
+
@no_cache = fetch_boolean_value(attrs, :no_cache, true)
|
|
30
|
+
@dry_run = fetch_boolean_value(attrs, :dry_run, false)
|
|
31
|
+
@key_filter = fetch_config_value(attrs, :key_filter, nil)
|
|
32
|
+
@write_back = fetch_boolean_value(attrs, :write_back, false)
|
|
33
|
+
@write_back_to_code = fetch_boolean_value(attrs, :write_back_to_code, false)
|
|
34
|
+
@swift_functions = fetch_defaulting_value(attrs, :swift_functions, default_swift_functions)
|
|
35
|
+
@diff_base = fetch_config_value(attrs, :diff_base, nil)
|
|
36
|
+
@context_prefix = fetch_defaulting_value(attrs, :context_prefix, DEFAULT_CONTEXT_PREFIX)
|
|
37
|
+
@context_mode = fetch_defaulting_value(attrs, :context_mode, DEFAULT_CONTEXT_MODE)
|
|
38
|
+
@start_key = fetch_config_value(attrs, :start_key, nil)
|
|
39
|
+
@end_key = fetch_config_value(attrs, :end_key, nil)
|
|
40
|
+
@include_file_paths = fetch_boolean_value(attrs, :include_file_paths, false)
|
|
41
|
+
@include_translation_comments = fetch_boolean_value(attrs, :include_translation_comments, true)
|
|
42
|
+
@redact_prompts = fetch_boolean_value(attrs, :redact_prompts, true)
|
|
43
|
+
@discovery_mode = fetch_defaulting_value(attrs, :discovery_mode, 'auto')
|
|
42
44
|
end
|
|
43
45
|
|
|
44
46
|
def default_swift_functions
|
|
@@ -65,6 +67,7 @@ module I18nContextGenerator
|
|
|
65
67
|
concurrency: yaml.dig('processing', 'concurrency') || 5,
|
|
66
68
|
context_lines: yaml.dig('processing', 'context_lines') || 15,
|
|
67
69
|
max_matches_per_key: yaml.dig('processing', 'max_matches_per_key') || 3,
|
|
70
|
+
discovery_mode: yaml.dig('processing', 'discovery_mode') || 'auto',
|
|
68
71
|
output_path: yaml.dig('output', 'path'),
|
|
69
72
|
output_format: yaml.dig('output', 'format') || 'csv',
|
|
70
73
|
write_back: yaml.dig('output', 'write_back') || false,
|
|
@@ -105,6 +108,7 @@ module I18nContextGenerator
|
|
|
105
108
|
concurrency: options[:concurrency] || 5,
|
|
106
109
|
context_lines: 15,
|
|
107
110
|
max_matches_per_key: 3,
|
|
111
|
+
discovery_mode: options[:discovery_mode] || 'auto',
|
|
108
112
|
output_path: options[:output],
|
|
109
113
|
output_format: options[:format] || 'csv',
|
|
110
114
|
no_cache: options[:cache].nil? || !options[:cache],
|
|
@@ -170,6 +174,20 @@ module I18nContextGenerator
|
|
|
170
174
|
|
|
171
175
|
private
|
|
172
176
|
|
|
177
|
+
def fetch_config_value(attrs, key, default)
|
|
178
|
+
attrs.key?(key) ? attrs[key] : default
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def fetch_defaulting_value(attrs, key, default)
|
|
182
|
+
attrs.key?(key) ? (attrs[key] || default) : default
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def fetch_boolean_value(attrs, key, default)
|
|
186
|
+
return default unless attrs.key?(key)
|
|
187
|
+
|
|
188
|
+
attrs[key].nil? ? default : attrs[key]
|
|
189
|
+
end
|
|
190
|
+
|
|
173
191
|
def merge_cli_scalar_options(options)
|
|
174
192
|
scalar_mappings = {
|
|
175
193
|
key_filter: :keys,
|
|
@@ -178,6 +196,7 @@ module I18nContextGenerator
|
|
|
178
196
|
provider: :provider,
|
|
179
197
|
model: :model,
|
|
180
198
|
concurrency: :concurrency,
|
|
199
|
+
discovery_mode: :discovery_mode,
|
|
181
200
|
diff_base: :diff_base,
|
|
182
201
|
context_prefix: :context_prefix,
|
|
183
202
|
context_mode: :context_mode,
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module I18nContextGenerator
|
|
4
|
+
class ContextExtractor
|
|
5
|
+
# Small logging helpers to keep the main extractor focused on orchestration.
|
|
6
|
+
module RunLogging
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
def log_empty_entries_message
|
|
10
|
+
if @config.diff_base && translation_backed_discovery?
|
|
11
|
+
puts "No changed translation keys found since #{@config.diff_base}."
|
|
12
|
+
elsif @config.diff_base && source_discovery_filtered_by_diff?
|
|
13
|
+
puts "No changed source localization entries found since #{@config.diff_base}."
|
|
14
|
+
else
|
|
15
|
+
puts "No #{entry_label_for_logging} found."
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def log_loaded_entries(count)
|
|
20
|
+
puts "Loaded #{count} #{entry_label_for_logging}"
|
|
21
|
+
puts "(filtered to changes since #{@config.diff_base})" if @config.diff_base && translation_backed_discovery?
|
|
22
|
+
puts "(filtered to source changes since #{@config.diff_base})" if source_discovery_filtered_by_diff?
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module I18nContextGenerator
|
|
4
|
+
class ContextExtractor
|
|
5
|
+
# Helpers for filtering source-discovered entries without narrowing the full
|
|
6
|
+
# source search scope used later for context lookup.
|
|
7
|
+
module SourceFilters
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def source_discovery?
|
|
11
|
+
!translation_backed_discovery?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def source_discovery_filtered_by_diff?
|
|
15
|
+
source_discovery? && @config.diff_base && @config.source_line_filter.nil?
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def filter_source_entries(entries)
|
|
19
|
+
line_filter = source_line_filter
|
|
20
|
+
return entries if line_filter.nil?
|
|
21
|
+
return [] if line_filter.empty?
|
|
22
|
+
|
|
23
|
+
entries.select do |entry|
|
|
24
|
+
line_filter.fetch(entry.file.to_s, Set.new).include?(entry.line)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def source_line_filter
|
|
29
|
+
return @source_line_filter if defined?(@source_line_filter)
|
|
30
|
+
|
|
31
|
+
@source_line_filter =
|
|
32
|
+
if @config.source_line_filter
|
|
33
|
+
normalize_source_line_filter(@config.source_line_filter)
|
|
34
|
+
elsif @config.diff_base && source_discovery?
|
|
35
|
+
GitDiff.new(base_ref: @config.diff_base).changed_lines(@config.source_paths)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def normalize_source_line_filter(filter)
|
|
40
|
+
filter.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |(file, lines), normalized_filter|
|
|
41
|
+
next if file.nil?
|
|
42
|
+
|
|
43
|
+
normalized_lines = Array(lines).filter_map { |line| Integer(line, exception: false) }
|
|
44
|
+
next if normalized_lines.empty?
|
|
45
|
+
|
|
46
|
+
normalized_filter[file.to_s].merge(normalized_lines)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def result_locations_for(entry, matches)
|
|
51
|
+
source_location = entry.metadata&.dig(:source_location)
|
|
52
|
+
return [source_location] if source_location
|
|
53
|
+
|
|
54
|
+
matches.map { |m| "#{m.file}:#{m.line}" }
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'context_extractor/source_filters'
|
|
4
|
+
require_relative 'context_extractor/run_logging'
|
|
5
|
+
|
|
3
6
|
module I18nContextGenerator
|
|
4
7
|
# Main orchestrator that parses translation files, searches source code for usages,
|
|
5
8
|
# sends context to the LLM, and writes results via the configured writer.
|
|
6
9
|
class ContextExtractor
|
|
7
10
|
include Writers::Helpers
|
|
11
|
+
include SourceFilters
|
|
12
|
+
include RunLogging
|
|
8
13
|
|
|
9
14
|
# Result for a single translation key
|
|
10
15
|
ExtractionResult = Data.define(:key, :text, :description, :source_file, :ui_element, :tone,
|
|
@@ -45,22 +50,17 @@ module I18nContextGenerator
|
|
|
45
50
|
def run
|
|
46
51
|
PlatformValidator.new(@config).validate!
|
|
47
52
|
|
|
48
|
-
entries =
|
|
53
|
+
entries = load_entries
|
|
49
54
|
entries = filter_entries(entries) if @config.key_filter
|
|
50
|
-
entries = filter_by_diff(entries) if @config.diff_base
|
|
55
|
+
entries = filter_by_diff(entries) if @config.diff_base && translation_backed_discovery?
|
|
51
56
|
entries = filter_by_range(entries) if @config.start_key || @config.end_key
|
|
52
57
|
|
|
53
58
|
if entries.empty?
|
|
54
|
-
|
|
55
|
-
puts "No changed translation keys found since #{@config.diff_base}."
|
|
56
|
-
else
|
|
57
|
-
puts 'No translation entries found.'
|
|
58
|
-
end
|
|
59
|
+
log_empty_entries_message
|
|
59
60
|
return
|
|
60
61
|
end
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
puts "(filtered to changes since #{@config.diff_base})" if @config.diff_base
|
|
63
|
+
log_loaded_entries(entries.size)
|
|
64
64
|
|
|
65
65
|
if @config.dry_run
|
|
66
66
|
puts "\nDry run - would process these keys:"
|
|
@@ -102,7 +102,9 @@ module I18nContextGenerator
|
|
|
102
102
|
end
|
|
103
103
|
|
|
104
104
|
def load_translations
|
|
105
|
-
@
|
|
105
|
+
return @load_translations if defined?(@load_translations)
|
|
106
|
+
|
|
107
|
+
@load_translations = @config.translations.uniq.flat_map do |path|
|
|
106
108
|
unless File.exist?(path)
|
|
107
109
|
warn "Translation file not found: #{path}"
|
|
108
110
|
next []
|
|
@@ -113,6 +115,67 @@ module I18nContextGenerator
|
|
|
113
115
|
end
|
|
114
116
|
end
|
|
115
117
|
|
|
118
|
+
def load_entries
|
|
119
|
+
case normalized_discovery_mode
|
|
120
|
+
when 'source'
|
|
121
|
+
load_source_entries
|
|
122
|
+
when 'translations'
|
|
123
|
+
load_translations
|
|
124
|
+
else
|
|
125
|
+
auto_discovery_entries
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def auto_discovery_entries
|
|
130
|
+
return load_source_entries if @config.translations.empty?
|
|
131
|
+
|
|
132
|
+
load_translations
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def load_source_entries
|
|
136
|
+
translation_lookup = load_translation_lookup
|
|
137
|
+
discovered_entries = filter_source_entries(searcher.discover_localization_entries)
|
|
138
|
+
|
|
139
|
+
discovered_entries.map do |entry|
|
|
140
|
+
hydrated_entry = translation_lookup[entry.key]
|
|
141
|
+
translation_comment = hydrated_entry&.metadata&.dig(:comment)
|
|
142
|
+
source_comment = entry.comment
|
|
143
|
+
metadata = {}
|
|
144
|
+
metadata[:comment] = translation_comment || source_comment if translation_comment || source_comment
|
|
145
|
+
metadata[:source_location] = "#{entry.file}:#{entry.line}"
|
|
146
|
+
|
|
147
|
+
Parsers::TranslationEntry.new(
|
|
148
|
+
key: entry.key,
|
|
149
|
+
text: hydrated_entry&.text || entry.text || entry.key,
|
|
150
|
+
source_file: hydrated_entry&.source_file,
|
|
151
|
+
metadata: metadata
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def load_translation_lookup
|
|
157
|
+
return @load_translation_lookup if defined?(@load_translation_lookup)
|
|
158
|
+
|
|
159
|
+
@load_translation_lookup = load_translations.each_with_object({}) do |entry, lookup|
|
|
160
|
+
lookup[entry.key] ||= entry
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def normalized_discovery_mode
|
|
165
|
+
@config.discovery_mode.to_s.downcase
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def translation_backed_discovery?
|
|
169
|
+
return true if normalized_discovery_mode == 'translations'
|
|
170
|
+
return false if normalized_discovery_mode == 'source'
|
|
171
|
+
|
|
172
|
+
@config.translations.any?
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def entry_label_for_logging
|
|
176
|
+
translation_backed_discovery? ? 'translation keys' : 'source localization entries'
|
|
177
|
+
end
|
|
178
|
+
|
|
116
179
|
def filter_entries(entries)
|
|
117
180
|
patterns = @config.key_filter.split(',').map do |pattern|
|
|
118
181
|
escaped = Regexp.escape(pattern.strip).gsub('\*', '.*')
|
|
@@ -277,7 +340,7 @@ module I18nContextGenerator
|
|
|
277
340
|
ui_element: llm_result.ui_element,
|
|
278
341
|
tone: llm_result.tone,
|
|
279
342
|
max_length: llm_result.max_length,
|
|
280
|
-
locations: matches
|
|
343
|
+
locations: result_locations_for(entry, matches),
|
|
281
344
|
error: llm_result.error
|
|
282
345
|
)
|
|
283
346
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'open3'
|
|
4
|
+
require 'pathname'
|
|
4
5
|
|
|
5
6
|
module I18nContextGenerator
|
|
6
7
|
# Parses git diff to extract changed translation keys
|
|
@@ -18,7 +19,7 @@ module I18nContextGenerator
|
|
|
18
19
|
translation_paths.each do |path|
|
|
19
20
|
next unless File.exist?(path)
|
|
20
21
|
|
|
21
|
-
diff_output =
|
|
22
|
+
diff_output = git_diff_for_path(path)
|
|
22
23
|
next if diff_output.empty?
|
|
23
24
|
|
|
24
25
|
keys.merge(extract_keys_from_diff(diff_output, path))
|
|
@@ -27,9 +28,23 @@ module I18nContextGenerator
|
|
|
27
28
|
keys
|
|
28
29
|
end
|
|
29
30
|
|
|
31
|
+
# Get changed line numbers in source files since the base ref.
|
|
32
|
+
# @param source_paths [Array<String>] paths to source files or directories
|
|
33
|
+
# @return [Hash{String => Set<Integer>}] changed line numbers keyed by file path
|
|
34
|
+
def changed_lines(source_paths)
|
|
35
|
+
source_paths.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |path, line_map|
|
|
36
|
+
next unless File.exist?(path)
|
|
37
|
+
|
|
38
|
+
diff_output = git_diff_for_path(path)
|
|
39
|
+
next if diff_output.empty?
|
|
40
|
+
|
|
41
|
+
merge_line_maps!(line_map, extract_changed_lines(diff_output, path))
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
30
45
|
# Check if we're in a git repository
|
|
31
46
|
def self.available?
|
|
32
|
-
system('git rev-parse --git-dir
|
|
47
|
+
system('git', 'rev-parse', '--git-dir', out: File::NULL, err: File::NULL)
|
|
33
48
|
end
|
|
34
49
|
|
|
35
50
|
# Check if the base ref exists
|
|
@@ -39,7 +54,7 @@ module I18nContextGenerator
|
|
|
39
54
|
|
|
40
55
|
private
|
|
41
56
|
|
|
42
|
-
def
|
|
57
|
+
def git_diff_for_path(path)
|
|
43
58
|
# Run git from the directory containing the file so the correct repo is used
|
|
44
59
|
dir = File.directory?(path) ? path : File.dirname(path)
|
|
45
60
|
pathspec = File.directory?(path) ? '.' : File.basename(path)
|
|
@@ -48,6 +63,54 @@ module I18nContextGenerator
|
|
|
48
63
|
status.success? ? stdout : ''
|
|
49
64
|
end
|
|
50
65
|
|
|
66
|
+
def extract_changed_lines(diff_output, path)
|
|
67
|
+
changed_lines = Hash.new { |h, k| h[k] = Set.new }
|
|
68
|
+
current_file = File.file?(path) ? path : nil
|
|
69
|
+
file_line = nil
|
|
70
|
+
|
|
71
|
+
diff_output.each_line do |line|
|
|
72
|
+
if (match = line.match(%r{^\+\+\+ b/(.+)$}))
|
|
73
|
+
current_file = resolve_diff_file_path(path, match[1])
|
|
74
|
+
next
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if (hunk = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/))
|
|
78
|
+
file_line = hunk[1].to_i
|
|
79
|
+
next
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
next if line.start_with?('diff ', 'index ', '--- ', '+++ ', '\\')
|
|
83
|
+
next if file_line.nil? || current_file.nil?
|
|
84
|
+
|
|
85
|
+
if line.start_with?('+')
|
|
86
|
+
changed_lines[current_file] << file_line
|
|
87
|
+
file_line += 1
|
|
88
|
+
elsif line.start_with?('-')
|
|
89
|
+
next
|
|
90
|
+
else
|
|
91
|
+
file_line += 1
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
changed_lines
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def resolve_diff_file_path(path, diff_file_path)
|
|
99
|
+
return Pathname.new(path).cleanpath.to_s if File.file?(path)
|
|
100
|
+
|
|
101
|
+
normalized_path = path.to_s.sub(%r{/\z}, '')
|
|
102
|
+
return Pathname.new(diff_file_path).cleanpath.to_s if normalized_path.empty? || normalized_path == '.'
|
|
103
|
+
return Pathname.new(diff_file_path).cleanpath.to_s if diff_file_path == normalized_path || diff_file_path.start_with?("#{normalized_path}/")
|
|
104
|
+
|
|
105
|
+
Pathname.new(File.join(normalized_path, diff_file_path)).cleanpath.to_s
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def merge_line_maps!(target, source)
|
|
109
|
+
source.each do |file, lines|
|
|
110
|
+
target[file].merge(lines)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
51
114
|
def extract_keys_from_diff(diff_output, path)
|
|
52
115
|
ext = File.extname(path).downcase
|
|
53
116
|
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module I18nContextGenerator
|
|
4
|
+
class Searcher
|
|
5
|
+
# Source-discovery helpers used for source-first extraction runs.
|
|
6
|
+
module SourceDiscovery
|
|
7
|
+
IOS_SINGLE_LINE_DISCOVERY_PATTERNS = [
|
|
8
|
+
/NSLocalizedString\s*\(\s*@?["'](?<key>[^"']+)["'](?:.*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
|
|
9
|
+
/String\s*\(\s*localized:\s*["'](?<key>[^"']+)["'](?:.*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
|
|
10
|
+
/LocalizedStringKey\s*\(\s*["'](?<key>[^"']+)["']\s*\)/,
|
|
11
|
+
/:\s*LocalizedStringKey\s*=\s*["'](?<key>[^"']+)["']/,
|
|
12
|
+
/Text\s*\(\s*["'](?<key>[^"']+)["']/,
|
|
13
|
+
/["'](?<key>[^"']+)["']\.localized\b/
|
|
14
|
+
].freeze
|
|
15
|
+
|
|
16
|
+
IOS_MULTILINE_DISCOVERY_PATTERNS = [
|
|
17
|
+
/NSLocalizedString\s*\(\s*@?["'](?<key>[^"']+)["'](?:(?:(?!\)\s*[),]?)[\s\S])*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
|
|
18
|
+
/String\s*\(\s*localized:\s*["'](?<key>[^"']+)["'](?:(?:(?!\)\s*[),]?)[\s\S])*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
|
|
19
|
+
/Text\s*\(\s*LocalizedStringKey\s*\(\s*["'](?<key>[^"']+)["']\s*\)\s*\)/
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
ANDROID_DISCOVERY_PATTERN = %r{
|
|
23
|
+
R\.string\.(\w+)\b|
|
|
24
|
+
@string/([\w.]+)\b|
|
|
25
|
+
[(\s,=]string\.(\w+)\b
|
|
26
|
+
}x
|
|
27
|
+
|
|
28
|
+
def discover_localization_entries
|
|
29
|
+
entries = discover_files.flat_map do |file|
|
|
30
|
+
discover_entries_in_file(file)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
deduplicate_discovered_entries(entries)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def discover_entries_in_file(file)
|
|
39
|
+
case platform_for_file(file)
|
|
40
|
+
when :ios
|
|
41
|
+
discover_ios_entries(file)
|
|
42
|
+
when :android
|
|
43
|
+
discover_android_entries(file)
|
|
44
|
+
else
|
|
45
|
+
[]
|
|
46
|
+
end
|
|
47
|
+
rescue Errno::ENOENT, Errno::EACCES, Errno::EISDIR => e
|
|
48
|
+
warn "Warning: Could not read #{file}: #{e.message}" if $VERBOSE
|
|
49
|
+
[]
|
|
50
|
+
rescue ArgumentError => e
|
|
51
|
+
return [] if e.message.include?('invalid byte sequence')
|
|
52
|
+
|
|
53
|
+
raise
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def deduplicate_discovered_entries(entries)
|
|
57
|
+
entries.each_with_object({}) do |entry, deduplicated_entries|
|
|
58
|
+
existing_entry = deduplicated_entries[entry.key]
|
|
59
|
+
if existing_entry.nil?
|
|
60
|
+
deduplicated_entries[entry.key] = entry
|
|
61
|
+
next
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
next if !existing_entry.comment.to_s.empty? || entry.comment.to_s.empty?
|
|
65
|
+
|
|
66
|
+
deduplicated_entries[entry.key] = entry
|
|
67
|
+
end.values
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def platform_for_file(file)
|
|
71
|
+
case File.extname(file).downcase
|
|
72
|
+
when '.swift', '.m', '.mm', '.h'
|
|
73
|
+
:ios
|
|
74
|
+
when '.kt', '.java'
|
|
75
|
+
:android
|
|
76
|
+
when '.xml'
|
|
77
|
+
file.include?('/res/') ? :android : nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def discover_ios_entries(file)
|
|
82
|
+
lines = cached_file_lines(file)
|
|
83
|
+
entries = []
|
|
84
|
+
index = 0
|
|
85
|
+
|
|
86
|
+
while index < lines.length
|
|
87
|
+
line = lines[index]
|
|
88
|
+
next_index, discovered_entry = extract_ios_entry(lines, file, index, line)
|
|
89
|
+
entries << discovered_entry if discovered_entry
|
|
90
|
+
index = next_index || (index + 1)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
entries
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def extract_ios_entry(lines, file, index, line)
|
|
97
|
+
if (entry = extract_ios_single_line_entry(file, index, line))
|
|
98
|
+
return [index + 1, entry]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
return extract_ios_multiline_entry(lines, file, index) if IOS_FUNCTION_OPENERS.any? { |opener| opener.match?(line) }
|
|
102
|
+
|
|
103
|
+
[index + 1, nil]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def extract_ios_single_line_entry(file, index, line)
|
|
107
|
+
pattern = IOS_SINGLE_LINE_DISCOVERY_PATTERNS.find { |candidate| candidate.match?(line) }
|
|
108
|
+
return unless pattern
|
|
109
|
+
|
|
110
|
+
build_ios_discovered_entry(file, index, pattern.match(line))
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def extract_ios_multiline_entry(lines, file, start_index, lookahead: 8)
|
|
114
|
+
end_index = [lines.length - 1, start_index + lookahead].min
|
|
115
|
+
snippet = lines[start_index..end_index].join("\n")
|
|
116
|
+
pattern = IOS_MULTILINE_DISCOVERY_PATTERNS.find { |candidate| candidate.match?(snippet) }
|
|
117
|
+
return [start_index + 1, nil] unless pattern
|
|
118
|
+
|
|
119
|
+
match = pattern.match(snippet)
|
|
120
|
+
key_line_index = locate_key_line(lines, start_index, end_index, match[:key])
|
|
121
|
+
|
|
122
|
+
[
|
|
123
|
+
(key_line_index || start_index) + 1,
|
|
124
|
+
build_ios_discovered_entry(file, key_line_index || start_index, match)
|
|
125
|
+
]
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def build_ios_discovered_entry(file, index, match)
|
|
129
|
+
key = match[:key]
|
|
130
|
+
return if key.nil? || key.empty?
|
|
131
|
+
|
|
132
|
+
text = match.names.include?('text') ? match[:text] : nil
|
|
133
|
+
comment = match.names.include?('comment') ? unescape_source_string(match[:comment]) : nil
|
|
134
|
+
text = unescape_source_string(text) if text
|
|
135
|
+
|
|
136
|
+
DiscoveredLocalization.new(
|
|
137
|
+
key: key,
|
|
138
|
+
file: file,
|
|
139
|
+
line: index + 1,
|
|
140
|
+
text: text,
|
|
141
|
+
comment: comment
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def locate_key_line(lines, start_index, end_index, key)
|
|
146
|
+
(start_index..end_index).find do |index|
|
|
147
|
+
lines[index].include?("\"#{key}\"") || lines[index].include?("@\"#{key}\"")
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def discover_android_entries(file)
|
|
152
|
+
lines = cached_file_lines(file)
|
|
153
|
+
entries = []
|
|
154
|
+
|
|
155
|
+
lines.each_with_index do |line, index|
|
|
156
|
+
entries.concat(extract_android_entries_from_line(file, index, line))
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
entries
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def extract_android_entries_from_line(file, index, line)
|
|
163
|
+
line.scan(ANDROID_DISCOVERY_PATTERN).filter_map do |captures|
|
|
164
|
+
key = captures.compact.first
|
|
165
|
+
next if key.nil? || key.empty?
|
|
166
|
+
|
|
167
|
+
DiscoveredLocalization.new(
|
|
168
|
+
key: key,
|
|
169
|
+
file: file,
|
|
170
|
+
line: index + 1
|
|
171
|
+
)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def unescape_source_string(text)
|
|
176
|
+
return if text.nil?
|
|
177
|
+
|
|
178
|
+
text
|
|
179
|
+
.gsub('\\"', '"')
|
|
180
|
+
.gsub("\\'", "'")
|
|
181
|
+
.gsub('\\\\', '\\')
|
|
182
|
+
.gsub('\\n', "\n")
|
|
183
|
+
.gsub('\\t', "\t")
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'find'
|
|
4
|
+
require_relative 'searcher/source_discovery'
|
|
4
5
|
|
|
5
6
|
module I18nContextGenerator
|
|
6
7
|
# Finds where translation keys are used in iOS and Android source code.
|
|
7
8
|
class Searcher
|
|
9
|
+
include SourceDiscovery
|
|
10
|
+
|
|
8
11
|
# Represents a code match with surrounding context
|
|
9
12
|
Match = Data.define(:file, :line, :match_line, :context, :enclosing_scope) do
|
|
10
13
|
def initialize(file:, line:, match_line: '', context: '', enclosing_scope: nil)
|
|
@@ -12,6 +15,13 @@ module I18nContextGenerator
|
|
|
12
15
|
end
|
|
13
16
|
end
|
|
14
17
|
|
|
18
|
+
# Represents a localization entry discovered directly from source code.
|
|
19
|
+
DiscoveredLocalization = Data.define(:key, :file, :line, :text, :comment) do
|
|
20
|
+
def initialize(key:, file:, line:, text: nil, comment: nil)
|
|
21
|
+
super
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
15
25
|
# Patterns that indicate false positive matches (not actual localization usage)
|
|
16
26
|
FALSE_POSITIVE_PATTERNS = [
|
|
17
27
|
/==\s*["']/, # String comparisons like == "yes"
|
|
@@ -367,6 +377,8 @@ module I18nContextGenerator
|
|
|
367
377
|
"String\\s*\\(\\s*localized:\\s*[\"']#{escaped}[\"']",
|
|
368
378
|
# LocalizedStringKey("key") - SwiftUI
|
|
369
379
|
"LocalizedStringKey\\s*\\(\\s*[\"']#{escaped}[\"']",
|
|
380
|
+
# Direct assignment to a LocalizedStringKey-typed value
|
|
381
|
+
"LocalizedStringKey\\s*=\\s*[\"']#{escaped}[\"']",
|
|
370
382
|
# Text("key") - SwiftUI (when using localized strings)
|
|
371
383
|
"Text\\s*\\(\\s*[\"']#{escaped}[\"']",
|
|
372
384
|
# .localized extension pattern
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: i18n-context-generator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Automattic
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: concurrent-ruby
|
|
@@ -152,6 +152,8 @@ files:
|
|
|
152
152
|
- lib/i18n_context_generator/cli.rb
|
|
153
153
|
- lib/i18n_context_generator/config.rb
|
|
154
154
|
- lib/i18n_context_generator/context_extractor.rb
|
|
155
|
+
- lib/i18n_context_generator/context_extractor/run_logging.rb
|
|
156
|
+
- lib/i18n_context_generator/context_extractor/source_filters.rb
|
|
155
157
|
- lib/i18n_context_generator/git_diff.rb
|
|
156
158
|
- lib/i18n_context_generator/llm/anthropic.rb
|
|
157
159
|
- lib/i18n_context_generator/llm/client.rb
|
|
@@ -163,6 +165,7 @@ files:
|
|
|
163
165
|
- lib/i18n_context_generator/parsers/yaml_parser.rb
|
|
164
166
|
- lib/i18n_context_generator/platform_validator.rb
|
|
165
167
|
- lib/i18n_context_generator/searcher.rb
|
|
168
|
+
- lib/i18n_context_generator/searcher/source_discovery.rb
|
|
166
169
|
- lib/i18n_context_generator/version.rb
|
|
167
170
|
- lib/i18n_context_generator/writers/android_xml_writer.rb
|
|
168
171
|
- lib/i18n_context_generator/writers/csv_writer.rb
|