i18n-context-generator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bfe865e8057b9ad9e5a239beca369736c430b118ba138ebbb2696be2d1c64af3
4
- data.tar.gz: 7669dbee58f5030654eb40115db899d650b3bcfa379738c48242dbac2a59e0dc
3
+ metadata.gz: 8e8761f1cd9b514b6d8de047f23dd3439770039e5c96d639468a3be93db28730
4
+ data.tar.gz: 739ded1c82976fcbd7756a8363a6e667a6bce6641c0544b0cee5482c16355b73
5
5
  SHA512:
6
- metadata.gz: 6c544f04ecabc25b2d6e2e196f1aeb7b0d0dd0bfa17be8ad57bef46e02d4e4741ffd2c6187d7e36acbccdedd9826a963d3ef4fb70825a988b55b11b76c030465
7
- data.tar.gz: 8878af77894c57dbdc169bed6ae982889698b3121dca6bbc59c0af9099c520e272145ef9d1324d184dba9af5cf218d7cb7b9965b4369b8ac3a3c55d0a4e461c1
6
+ metadata.gz: 0bc26c4fe73b9fc8e17d273ce6f471e23f5febde0419ed8eb8c02570a36c945a287e95c85af15ebf02d8a001ee95d46e950a7b4129e4f4f9bf5e483f205b3371
7
+ data.tar.gz: 934d026c57e651fa81ae2c436c49ea41bfcf8498c1ce888090c022a82837f31a21d6cb669c4d22a39ebc2e9cee4ac5ae5d578bf26b1978703ad1112a7030762e
@@ -36,6 +36,8 @@ module I18nContextGenerator
36
36
  option :provider, aliases: '-p', enum: %w[anthropic openai], desc: 'LLM provider (default: anthropic)'
37
37
  option :model, aliases: '-m', desc: 'LLM model to use'
38
38
  option :keys, aliases: '-k', desc: 'Filter keys (comma-separated patterns, supports * wildcard)'
39
+ option :discovery_mode, type: :string, enum: %w[auto translations source],
40
+ desc: 'How to discover entries: auto, translations, or source (default: auto)'
39
41
  option :concurrency, type: :numeric, desc: 'Number of concurrent requests (default: 5)'
40
42
  option :dry_run, type: :boolean, desc: 'Show what would be processed without calling LLM'
41
43
  option :cache, type: :boolean, desc: 'Enable caching of LLM results'
@@ -101,8 +103,9 @@ module I18nContextGenerator
101
103
  return if options[:config] && File.exist?(options[:config])
102
104
 
103
105
  return if options[:translations]
106
+ return if options[:discovery_mode] == 'source' && options[:source]
104
107
 
105
- say_error 'Error: --translations (-t) is required unless using a config file'
108
+ say_error 'Error: --translations (-t) is required unless using a config file or --discovery-mode source with --source'
106
109
  exit 1
107
110
  end
108
111
 
@@ -182,6 +185,8 @@ module I18nContextGenerator
182
185
 
183
186
  # Processing options
184
187
  processing:
188
+ # Discovery mode: auto, translations, or source
189
+ discovery_mode: auto
185
190
  concurrency: 5
186
191
  context_lines: 15
187
192
  max_matches_per_key: 3
@@ -3,42 +3,44 @@
3
3
  module I18nContextGenerator
4
4
  # Holds all configuration for an extraction run, loaded from YAML config files and/or CLI options.
5
5
  class Config
6
- attr_reader :translations, :source_paths, :ignore_patterns,
6
+ attr_reader :translations, :source_paths, :source_line_filter, :ignore_patterns,
7
7
  :provider, :model, :concurrency, :context_lines,
8
8
  :max_matches_per_key, :output_path, :output_format,
9
9
  :no_cache, :dry_run, :key_filter, :write_back,
10
10
  :swift_functions, :write_back_to_code, :diff_base, :context_prefix,
11
11
  :context_mode, :start_key, :end_key, :include_file_paths,
12
- :include_translation_comments, :redact_prompts
12
+ :include_translation_comments, :redact_prompts, :discovery_mode
13
13
 
14
14
  DEFAULT_CONTEXT_PREFIX = 'Context: '
15
15
  DEFAULT_CONTEXT_MODE = 'replace' # "replace" or "append"
16
16
 
17
17
  def initialize(**attrs)
18
- @translations = attrs[:translations] || []
19
- @source_paths = attrs[:source_paths] || ['.']
20
- @ignore_patterns = attrs[:ignore_patterns] || []
21
- @provider = attrs[:provider] || 'anthropic'
22
- @model = attrs[:model]
23
- @concurrency = attrs[:concurrency] || 5
24
- @context_lines = attrs[:context_lines] || 15
25
- @max_matches_per_key = attrs[:max_matches_per_key] || 3
26
- @output_path = attrs.key?(:output_path) ? attrs[:output_path] : nil
27
- @output_format = attrs[:output_format] || 'csv'
28
- @no_cache = attrs.key?(:no_cache) ? attrs[:no_cache] : true
29
- @dry_run = attrs[:dry_run] || false
30
- @key_filter = attrs[:key_filter]
31
- @write_back = attrs[:write_back] || false
32
- @write_back_to_code = attrs[:write_back_to_code] || false
33
- @swift_functions = attrs[:swift_functions] || default_swift_functions
34
- @diff_base = attrs[:diff_base]
35
- @context_prefix = attrs.key?(:context_prefix) ? attrs[:context_prefix] : DEFAULT_CONTEXT_PREFIX
36
- @context_mode = attrs[:context_mode] || DEFAULT_CONTEXT_MODE
37
- @start_key = attrs[:start_key]
38
- @end_key = attrs[:end_key]
39
- @include_file_paths = attrs.key?(:include_file_paths) ? attrs[:include_file_paths] : false
40
- @include_translation_comments = attrs.key?(:include_translation_comments) ? attrs[:include_translation_comments] : true
41
- @redact_prompts = attrs.key?(:redact_prompts) ? attrs[:redact_prompts] : true
18
+ @translations = fetch_defaulting_value(attrs, :translations, [])
19
+ @source_paths = fetch_defaulting_value(attrs, :source_paths, ['.'])
20
+ @source_line_filter = fetch_config_value(attrs, :source_line_filter, nil)
21
+ @ignore_patterns = fetch_defaulting_value(attrs, :ignore_patterns, [])
22
+ @provider = fetch_defaulting_value(attrs, :provider, 'anthropic')
23
+ @model = fetch_config_value(attrs, :model, nil)
24
+ @concurrency = fetch_defaulting_value(attrs, :concurrency, 5)
25
+ @context_lines = fetch_defaulting_value(attrs, :context_lines, 15)
26
+ @max_matches_per_key = fetch_defaulting_value(attrs, :max_matches_per_key, 3)
27
+ @output_path = fetch_config_value(attrs, :output_path, nil)
28
+ @output_format = fetch_defaulting_value(attrs, :output_format, 'csv')
29
+ @no_cache = fetch_boolean_value(attrs, :no_cache, true)
30
+ @dry_run = fetch_boolean_value(attrs, :dry_run, false)
31
+ @key_filter = fetch_config_value(attrs, :key_filter, nil)
32
+ @write_back = fetch_boolean_value(attrs, :write_back, false)
33
+ @write_back_to_code = fetch_boolean_value(attrs, :write_back_to_code, false)
34
+ @swift_functions = fetch_defaulting_value(attrs, :swift_functions, default_swift_functions)
35
+ @diff_base = fetch_config_value(attrs, :diff_base, nil)
36
+ @context_prefix = fetch_defaulting_value(attrs, :context_prefix, DEFAULT_CONTEXT_PREFIX)
37
+ @context_mode = fetch_defaulting_value(attrs, :context_mode, DEFAULT_CONTEXT_MODE)
38
+ @start_key = fetch_config_value(attrs, :start_key, nil)
39
+ @end_key = fetch_config_value(attrs, :end_key, nil)
40
+ @include_file_paths = fetch_boolean_value(attrs, :include_file_paths, false)
41
+ @include_translation_comments = fetch_boolean_value(attrs, :include_translation_comments, true)
42
+ @redact_prompts = fetch_boolean_value(attrs, :redact_prompts, true)
43
+ @discovery_mode = fetch_defaulting_value(attrs, :discovery_mode, 'auto')
42
44
  end
43
45
 
44
46
  def default_swift_functions
@@ -65,6 +67,7 @@ module I18nContextGenerator
65
67
  concurrency: yaml.dig('processing', 'concurrency') || 5,
66
68
  context_lines: yaml.dig('processing', 'context_lines') || 15,
67
69
  max_matches_per_key: yaml.dig('processing', 'max_matches_per_key') || 3,
70
+ discovery_mode: yaml.dig('processing', 'discovery_mode') || 'auto',
68
71
  output_path: yaml.dig('output', 'path'),
69
72
  output_format: yaml.dig('output', 'format') || 'csv',
70
73
  write_back: yaml.dig('output', 'write_back') || false,
@@ -105,6 +108,7 @@ module I18nContextGenerator
105
108
  concurrency: options[:concurrency] || 5,
106
109
  context_lines: 15,
107
110
  max_matches_per_key: 3,
111
+ discovery_mode: options[:discovery_mode] || 'auto',
108
112
  output_path: options[:output],
109
113
  output_format: options[:format] || 'csv',
110
114
  no_cache: options[:cache].nil? || !options[:cache],
@@ -170,6 +174,20 @@ module I18nContextGenerator
170
174
 
171
175
  private
172
176
 
177
+ def fetch_config_value(attrs, key, default)
178
+ attrs.key?(key) ? attrs[key] : default
179
+ end
180
+
181
+ def fetch_defaulting_value(attrs, key, default)
182
+ attrs.key?(key) ? (attrs[key] || default) : default
183
+ end
184
+
185
+ def fetch_boolean_value(attrs, key, default)
186
+ return default unless attrs.key?(key)
187
+
188
+ attrs[key].nil? ? default : attrs[key]
189
+ end
190
+
173
191
  def merge_cli_scalar_options(options)
174
192
  scalar_mappings = {
175
193
  key_filter: :keys,
@@ -178,6 +196,7 @@ module I18nContextGenerator
178
196
  provider: :provider,
179
197
  model: :model,
180
198
  concurrency: :concurrency,
199
+ discovery_mode: :discovery_mode,
181
200
  diff_base: :diff_base,
182
201
  context_prefix: :context_prefix,
183
202
  context_mode: :context_mode,
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ class ContextExtractor
5
+ # Small logging helpers to keep the main extractor focused on orchestration.
6
+ module RunLogging
7
+ private
8
+
9
+ def log_empty_entries_message
10
+ if @config.diff_base && translation_backed_discovery?
11
+ puts "No changed translation keys found since #{@config.diff_base}."
12
+ elsif @config.diff_base && source_discovery_filtered_by_diff?
13
+ puts "No changed source localization entries found since #{@config.diff_base}."
14
+ else
15
+ puts "No #{entry_label_for_logging} found."
16
+ end
17
+ end
18
+
19
+ def log_loaded_entries(count)
20
+ puts "Loaded #{count} #{entry_label_for_logging}"
21
+ puts "(filtered to changes since #{@config.diff_base})" if @config.diff_base && translation_backed_discovery?
22
+ puts "(filtered to source changes since #{@config.diff_base})" if source_discovery_filtered_by_diff?
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ class ContextExtractor
5
+ # Helpers for filtering source-discovered entries without narrowing the full
6
+ # source search scope used later for context lookup.
7
+ module SourceFilters
8
+ private
9
+
10
+ def source_discovery?
11
+ !translation_backed_discovery?
12
+ end
13
+
14
+ def source_discovery_filtered_by_diff?
15
+ source_discovery? && @config.diff_base && @config.source_line_filter.nil?
16
+ end
17
+
18
+ def filter_source_entries(entries)
19
+ line_filter = source_line_filter
20
+ return entries if line_filter.nil?
21
+ return [] if line_filter.empty?
22
+
23
+ entries.select do |entry|
24
+ line_filter.fetch(entry.file.to_s, Set.new).include?(entry.line)
25
+ end
26
+ end
27
+
28
+ def source_line_filter
29
+ return @source_line_filter if defined?(@source_line_filter)
30
+
31
+ @source_line_filter =
32
+ if @config.source_line_filter
33
+ normalize_source_line_filter(@config.source_line_filter)
34
+ elsif @config.diff_base && source_discovery?
35
+ GitDiff.new(base_ref: @config.diff_base).changed_lines(@config.source_paths)
36
+ end
37
+ end
38
+
39
+ def normalize_source_line_filter(filter)
40
+ filter.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |(file, lines), normalized_filter|
41
+ next if file.nil?
42
+
43
+ normalized_lines = Array(lines).filter_map { |line| Integer(line, exception: false) }
44
+ next if normalized_lines.empty?
45
+
46
+ normalized_filter[file.to_s].merge(normalized_lines)
47
+ end
48
+ end
49
+
50
+ def result_locations_for(entry, matches)
51
+ source_location = entry.metadata&.dig(:source_location)
52
+ return [source_location] if source_location
53
+
54
+ matches.map { |m| "#{m.file}:#{m.line}" }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,10 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'context_extractor/source_filters'
4
+ require_relative 'context_extractor/run_logging'
5
+
3
6
  module I18nContextGenerator
4
7
  # Main orchestrator that parses translation files, searches source code for usages,
5
8
  # sends context to the LLM, and writes results via the configured writer.
6
9
  class ContextExtractor
7
10
  include Writers::Helpers
11
+ include SourceFilters
12
+ include RunLogging
8
13
 
9
14
  # Result for a single translation key
10
15
  ExtractionResult = Data.define(:key, :text, :description, :source_file, :ui_element, :tone,
@@ -45,22 +50,17 @@ module I18nContextGenerator
45
50
  def run
46
51
  PlatformValidator.new(@config).validate!
47
52
 
48
- entries = load_translations
53
+ entries = load_entries
49
54
  entries = filter_entries(entries) if @config.key_filter
50
- entries = filter_by_diff(entries) if @config.diff_base
55
+ entries = filter_by_diff(entries) if @config.diff_base && translation_backed_discovery?
51
56
  entries = filter_by_range(entries) if @config.start_key || @config.end_key
52
57
 
53
58
  if entries.empty?
54
- if @config.diff_base
55
- puts "No changed translation keys found since #{@config.diff_base}."
56
- else
57
- puts 'No translation entries found.'
58
- end
59
+ log_empty_entries_message
59
60
  return
60
61
  end
61
62
 
62
- puts "Loaded #{entries.size} translation keys"
63
- puts "(filtered to changes since #{@config.diff_base})" if @config.diff_base
63
+ log_loaded_entries(entries.size)
64
64
 
65
65
  if @config.dry_run
66
66
  puts "\nDry run - would process these keys:"
@@ -102,7 +102,9 @@ module I18nContextGenerator
102
102
  end
103
103
 
104
104
  def load_translations
105
- @config.translations.uniq.flat_map do |path|
105
+ return @load_translations if defined?(@load_translations)
106
+
107
+ @load_translations = @config.translations.uniq.flat_map do |path|
106
108
  unless File.exist?(path)
107
109
  warn "Translation file not found: #{path}"
108
110
  next []
@@ -113,6 +115,67 @@ module I18nContextGenerator
113
115
  end
114
116
  end
115
117
 
118
+ def load_entries
119
+ case normalized_discovery_mode
120
+ when 'source'
121
+ load_source_entries
122
+ when 'translations'
123
+ load_translations
124
+ else
125
+ auto_discovery_entries
126
+ end
127
+ end
128
+
129
+ def auto_discovery_entries
130
+ return load_source_entries if @config.translations.empty?
131
+
132
+ load_translations
133
+ end
134
+
135
+ def load_source_entries
136
+ translation_lookup = load_translation_lookup
137
+ discovered_entries = filter_source_entries(searcher.discover_localization_entries)
138
+
139
+ discovered_entries.map do |entry|
140
+ hydrated_entry = translation_lookup[entry.key]
141
+ translation_comment = hydrated_entry&.metadata&.dig(:comment)
142
+ source_comment = entry.comment
143
+ metadata = {}
144
+ metadata[:comment] = translation_comment || source_comment if translation_comment || source_comment
145
+ metadata[:source_location] = "#{entry.file}:#{entry.line}"
146
+
147
+ Parsers::TranslationEntry.new(
148
+ key: entry.key,
149
+ text: hydrated_entry&.text || entry.text || entry.key,
150
+ source_file: hydrated_entry&.source_file,
151
+ metadata: metadata
152
+ )
153
+ end
154
+ end
155
+
156
+ def load_translation_lookup
157
+ return @load_translation_lookup if defined?(@load_translation_lookup)
158
+
159
+ @load_translation_lookup = load_translations.each_with_object({}) do |entry, lookup|
160
+ lookup[entry.key] ||= entry
161
+ end
162
+ end
163
+
164
+ def normalized_discovery_mode
165
+ @config.discovery_mode.to_s.downcase
166
+ end
167
+
168
+ def translation_backed_discovery?
169
+ return true if normalized_discovery_mode == 'translations'
170
+ return false if normalized_discovery_mode == 'source'
171
+
172
+ @config.translations.any?
173
+ end
174
+
175
+ def entry_label_for_logging
176
+ translation_backed_discovery? ? 'translation keys' : 'source localization entries'
177
+ end
178
+
116
179
  def filter_entries(entries)
117
180
  patterns = @config.key_filter.split(',').map do |pattern|
118
181
  escaped = Regexp.escape(pattern.strip).gsub('\*', '.*')
@@ -277,7 +340,7 @@ module I18nContextGenerator
277
340
  ui_element: llm_result.ui_element,
278
341
  tone: llm_result.tone,
279
342
  max_length: llm_result.max_length,
280
- locations: matches.map { |m| "#{m.file}:#{m.line}" },
343
+ locations: result_locations_for(entry, matches),
281
344
  error: llm_result.error
282
345
  )
283
346
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'open3'
4
+ require 'pathname'
4
5
 
5
6
  module I18nContextGenerator
6
7
  # Parses git diff to extract changed translation keys
@@ -18,7 +19,7 @@ module I18nContextGenerator
18
19
  translation_paths.each do |path|
19
20
  next unless File.exist?(path)
20
21
 
21
- diff_output = git_diff_for_file(path)
22
+ diff_output = git_diff_for_path(path)
22
23
  next if diff_output.empty?
23
24
 
24
25
  keys.merge(extract_keys_from_diff(diff_output, path))
@@ -27,9 +28,23 @@ module I18nContextGenerator
27
28
  keys
28
29
  end
29
30
 
31
+ # Get changed line numbers in source files since the base ref.
32
+ # @param source_paths [Array<String>] paths to source files or directories
33
+ # @return [Hash{String => Set<Integer>}] changed line numbers keyed by file path
34
+ def changed_lines(source_paths)
35
+ source_paths.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |path, line_map|
36
+ next unless File.exist?(path)
37
+
38
+ diff_output = git_diff_for_path(path)
39
+ next if diff_output.empty?
40
+
41
+ merge_line_maps!(line_map, extract_changed_lines(diff_output, path))
42
+ end
43
+ end
44
+
30
45
  # Check if we're in a git repository
31
46
  def self.available?
32
- system('git rev-parse --git-dir > /dev/null 2>&1')
47
+ system('git', 'rev-parse', '--git-dir', out: File::NULL, err: File::NULL)
33
48
  end
34
49
 
35
50
  # Check if the base ref exists
@@ -39,7 +54,7 @@ module I18nContextGenerator
39
54
 
40
55
  private
41
56
 
42
- def git_diff_for_file(path)
57
+ def git_diff_for_path(path)
43
58
  # Run git from the directory containing the file so the correct repo is used
44
59
  dir = File.directory?(path) ? path : File.dirname(path)
45
60
  pathspec = File.directory?(path) ? '.' : File.basename(path)
@@ -48,6 +63,54 @@ module I18nContextGenerator
48
63
  status.success? ? stdout : ''
49
64
  end
50
65
 
66
+ def extract_changed_lines(diff_output, path)
67
+ changed_lines = Hash.new { |h, k| h[k] = Set.new }
68
+ current_file = File.file?(path) ? path : nil
69
+ file_line = nil
70
+
71
+ diff_output.each_line do |line|
72
+ if (match = line.match(%r{^\+\+\+ b/(.+)$}))
73
+ current_file = resolve_diff_file_path(path, match[1])
74
+ next
75
+ end
76
+
77
+ if (hunk = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/))
78
+ file_line = hunk[1].to_i
79
+ next
80
+ end
81
+
82
+ next if line.start_with?('diff ', 'index ', '--- ', '+++ ', '\\')
83
+ next if file_line.nil? || current_file.nil?
84
+
85
+ if line.start_with?('+')
86
+ changed_lines[current_file] << file_line
87
+ file_line += 1
88
+ elsif line.start_with?('-')
89
+ next
90
+ else
91
+ file_line += 1
92
+ end
93
+ end
94
+
95
+ changed_lines
96
+ end
97
+
98
+ def resolve_diff_file_path(path, diff_file_path)
99
+ return Pathname.new(path).cleanpath.to_s if File.file?(path)
100
+
101
+ normalized_path = path.to_s.sub(%r{/\z}, '')
102
+ return Pathname.new(diff_file_path).cleanpath.to_s if normalized_path.empty? || normalized_path == '.'
103
+ return Pathname.new(diff_file_path).cleanpath.to_s if diff_file_path == normalized_path || diff_file_path.start_with?("#{normalized_path}/")
104
+
105
+ Pathname.new(File.join(normalized_path, diff_file_path)).cleanpath.to_s
106
+ end
107
+
108
+ def merge_line_maps!(target, source)
109
+ source.each do |file, lines|
110
+ target[file].merge(lines)
111
+ end
112
+ end
113
+
51
114
  def extract_keys_from_diff(diff_output, path)
52
115
  ext = File.extname(path).downcase
53
116
 
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ module I18nContextGenerator
4
+ class Searcher
5
+ # Source-discovery helpers used for source-first extraction runs.
6
+ module SourceDiscovery
7
+ IOS_SINGLE_LINE_DISCOVERY_PATTERNS = [
8
+ /NSLocalizedString\s*\(\s*@?["'](?<key>[^"']+)["'](?:.*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
9
+ /String\s*\(\s*localized:\s*["'](?<key>[^"']+)["'](?:.*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
10
+ /LocalizedStringKey\s*\(\s*["'](?<key>[^"']+)["']\s*\)/,
11
+ /:\s*LocalizedStringKey\s*=\s*["'](?<key>[^"']+)["']/,
12
+ /Text\s*\(\s*["'](?<key>[^"']+)["']/,
13
+ /["'](?<key>[^"']+)["']\.localized\b/
14
+ ].freeze
15
+
16
+ IOS_MULTILINE_DISCOVERY_PATTERNS = [
17
+ /NSLocalizedString\s*\(\s*@?["'](?<key>[^"']+)["'](?:(?:(?!\)\s*[),]?)[\s\S])*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
18
+ /String\s*\(\s*localized:\s*["'](?<key>[^"']+)["'](?:(?:(?!\)\s*[),]?)[\s\S])*?comment:\s*["'](?<comment>(?:\\.|[^"'\\])*)["'])?/,
19
+ /Text\s*\(\s*LocalizedStringKey\s*\(\s*["'](?<key>[^"']+)["']\s*\)\s*\)/
20
+ ].freeze
21
+
22
+ ANDROID_DISCOVERY_PATTERN = %r{
23
+ R\.string\.(\w+)\b|
24
+ @string/([\w.]+)\b|
25
+ [(\s,=]string\.(\w+)\b
26
+ }x
27
+
28
+ def discover_localization_entries
29
+ entries = discover_files.flat_map do |file|
30
+ discover_entries_in_file(file)
31
+ end
32
+
33
+ deduplicate_discovered_entries(entries)
34
+ end
35
+
36
+ private
37
+
38
+ def discover_entries_in_file(file)
39
+ case platform_for_file(file)
40
+ when :ios
41
+ discover_ios_entries(file)
42
+ when :android
43
+ discover_android_entries(file)
44
+ else
45
+ []
46
+ end
47
+ rescue Errno::ENOENT, Errno::EACCES, Errno::EISDIR => e
48
+ warn "Warning: Could not read #{file}: #{e.message}" if $VERBOSE
49
+ []
50
+ rescue ArgumentError => e
51
+ return [] if e.message.include?('invalid byte sequence')
52
+
53
+ raise
54
+ end
55
+
56
+ def deduplicate_discovered_entries(entries)
57
+ entries.each_with_object({}) do |entry, deduplicated_entries|
58
+ existing_entry = deduplicated_entries[entry.key]
59
+ if existing_entry.nil?
60
+ deduplicated_entries[entry.key] = entry
61
+ next
62
+ end
63
+
64
+ next if !existing_entry.comment.to_s.empty? || entry.comment.to_s.empty?
65
+
66
+ deduplicated_entries[entry.key] = entry
67
+ end.values
68
+ end
69
+
70
+ def platform_for_file(file)
71
+ case File.extname(file).downcase
72
+ when '.swift', '.m', '.mm', '.h'
73
+ :ios
74
+ when '.kt', '.java'
75
+ :android
76
+ when '.xml'
77
+ file.include?('/res/') ? :android : nil
78
+ end
79
+ end
80
+
81
+ def discover_ios_entries(file)
82
+ lines = cached_file_lines(file)
83
+ entries = []
84
+ index = 0
85
+
86
+ while index < lines.length
87
+ line = lines[index]
88
+ next_index, discovered_entry = extract_ios_entry(lines, file, index, line)
89
+ entries << discovered_entry if discovered_entry
90
+ index = next_index || (index + 1)
91
+ end
92
+
93
+ entries
94
+ end
95
+
96
+ def extract_ios_entry(lines, file, index, line)
97
+ if (entry = extract_ios_single_line_entry(file, index, line))
98
+ return [index + 1, entry]
99
+ end
100
+
101
+ return extract_ios_multiline_entry(lines, file, index) if IOS_FUNCTION_OPENERS.any? { |opener| opener.match?(line) }
102
+
103
+ [index + 1, nil]
104
+ end
105
+
106
+ def extract_ios_single_line_entry(file, index, line)
107
+ pattern = IOS_SINGLE_LINE_DISCOVERY_PATTERNS.find { |candidate| candidate.match?(line) }
108
+ return unless pattern
109
+
110
+ build_ios_discovered_entry(file, index, pattern.match(line))
111
+ end
112
+
113
+ def extract_ios_multiline_entry(lines, file, start_index, lookahead: 8)
114
+ end_index = [lines.length - 1, start_index + lookahead].min
115
+ snippet = lines[start_index..end_index].join("\n")
116
+ pattern = IOS_MULTILINE_DISCOVERY_PATTERNS.find { |candidate| candidate.match?(snippet) }
117
+ return [start_index + 1, nil] unless pattern
118
+
119
+ match = pattern.match(snippet)
120
+ key_line_index = locate_key_line(lines, start_index, end_index, match[:key])
121
+
122
+ [
123
+ (key_line_index || start_index) + 1,
124
+ build_ios_discovered_entry(file, key_line_index || start_index, match)
125
+ ]
126
+ end
127
+
128
+ def build_ios_discovered_entry(file, index, match)
129
+ key = match[:key]
130
+ return if key.nil? || key.empty?
131
+
132
+ text = match.names.include?('text') ? match[:text] : nil
133
+ comment = match.names.include?('comment') ? unescape_source_string(match[:comment]) : nil
134
+ text = unescape_source_string(text) if text
135
+
136
+ DiscoveredLocalization.new(
137
+ key: key,
138
+ file: file,
139
+ line: index + 1,
140
+ text: text,
141
+ comment: comment
142
+ )
143
+ end
144
+
145
+ def locate_key_line(lines, start_index, end_index, key)
146
+ (start_index..end_index).find do |index|
147
+ lines[index].include?("\"#{key}\"") || lines[index].include?("@\"#{key}\"")
148
+ end
149
+ end
150
+
151
+ def discover_android_entries(file)
152
+ lines = cached_file_lines(file)
153
+ entries = []
154
+
155
+ lines.each_with_index do |line, index|
156
+ entries.concat(extract_android_entries_from_line(file, index, line))
157
+ end
158
+
159
+ entries
160
+ end
161
+
162
+ def extract_android_entries_from_line(file, index, line)
163
+ line.scan(ANDROID_DISCOVERY_PATTERN).filter_map do |captures|
164
+ key = captures.compact.first
165
+ next if key.nil? || key.empty?
166
+
167
+ DiscoveredLocalization.new(
168
+ key: key,
169
+ file: file,
170
+ line: index + 1
171
+ )
172
+ end
173
+ end
174
+
175
+ def unescape_source_string(text)
176
+ return if text.nil?
177
+
178
+ text
179
+ .gsub('\\"', '"')
180
+ .gsub("\\'", "'")
181
+ .gsub('\\\\', '\\')
182
+ .gsub('\\n', "\n")
183
+ .gsub('\\t', "\t")
184
+ end
185
+ end
186
+ end
187
+ end
@@ -1,10 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'find'
4
+ require_relative 'searcher/source_discovery'
4
5
 
5
6
  module I18nContextGenerator
6
7
  # Finds where translation keys are used in iOS and Android source code.
7
8
  class Searcher
9
+ include SourceDiscovery
10
+
8
11
  # Represents a code match with surrounding context
9
12
  Match = Data.define(:file, :line, :match_line, :context, :enclosing_scope) do
10
13
  def initialize(file:, line:, match_line: '', context: '', enclosing_scope: nil)
@@ -12,6 +15,13 @@ module I18nContextGenerator
12
15
  end
13
16
  end
14
17
 
18
+ # Represents a localization entry discovered directly from source code.
19
+ DiscoveredLocalization = Data.define(:key, :file, :line, :text, :comment) do
20
+ def initialize(key:, file:, line:, text: nil, comment: nil)
21
+ super
22
+ end
23
+ end
24
+
15
25
  # Patterns that indicate false positive matches (not actual localization usage)
16
26
  FALSE_POSITIVE_PATTERNS = [
17
27
  /==\s*["']/, # String comparisons like == "yes"
@@ -367,6 +377,8 @@ module I18nContextGenerator
367
377
  "String\\s*\\(\\s*localized:\\s*[\"']#{escaped}[\"']",
368
378
  # LocalizedStringKey("key") - SwiftUI
369
379
  "LocalizedStringKey\\s*\\(\\s*[\"']#{escaped}[\"']",
380
+ # Direct assignment to a LocalizedStringKey-typed value
381
+ "LocalizedStringKey\\s*=\\s*[\"']#{escaped}[\"']",
370
382
  # Text("key") - SwiftUI (when using localized strings)
371
383
  "Text\\s*\\(\\s*[\"']#{escaped}[\"']",
372
384
  # .localized extension pattern
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module I18nContextGenerator
4
- VERSION = '0.3.0'
4
+ VERSION = '0.4.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: i18n-context-generator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Automattic
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-23 00:00:00.000000000 Z
11
+ date: 2026-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -152,6 +152,8 @@ files:
152
152
  - lib/i18n_context_generator/cli.rb
153
153
  - lib/i18n_context_generator/config.rb
154
154
  - lib/i18n_context_generator/context_extractor.rb
155
+ - lib/i18n_context_generator/context_extractor/run_logging.rb
156
+ - lib/i18n_context_generator/context_extractor/source_filters.rb
155
157
  - lib/i18n_context_generator/git_diff.rb
156
158
  - lib/i18n_context_generator/llm/anthropic.rb
157
159
  - lib/i18n_context_generator/llm/client.rb
@@ -163,6 +165,7 @@ files:
163
165
  - lib/i18n_context_generator/parsers/yaml_parser.rb
164
166
  - lib/i18n_context_generator/platform_validator.rb
165
167
  - lib/i18n_context_generator/searcher.rb
168
+ - lib/i18n_context_generator/searcher/source_discovery.rb
166
169
  - lib/i18n_context_generator/version.rb
167
170
  - lib/i18n_context_generator/writers/android_xml_writer.rb
168
171
  - lib/i18n_context_generator/writers/csv_writer.rb