ruby-maat 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.commitlintrc.json +44 -0
  3. data/.mailmap +3 -0
  4. data/.overcommit.yml +77 -0
  5. data/.release-please-config.json +33 -0
  6. data/.release-please-manifest.json +3 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +48 -0
  9. data/CHANGELOG.md +46 -0
  10. data/CI_CD_SETUP.md +180 -0
  11. data/CLAUDE.md +130 -0
  12. data/Dockerfile +40 -0
  13. data/README.md +444 -0
  14. data/README_RUBY.md +300 -0
  15. data/RELEASE_PLEASE_SETUP.md +198 -0
  16. data/RUBY_MAAT.md +227 -0
  17. data/Rakefile +12 -0
  18. data/doc/imgs/abs_churn_sample.png +0 -0
  19. data/doc/imgs/code_age_sample.png +0 -0
  20. data/doc/imgs/coupling_sample.png +0 -0
  21. data/doc/imgs/crime_cover.jpg +0 -0
  22. data/doc/imgs/tree_map_sample.png +0 -0
  23. data/doc/intro.md +3 -0
  24. data/exe/ruby-maat +6 -0
  25. data/lib/ruby_maat/analysis/authors.rb +47 -0
  26. data/lib/ruby_maat/analysis/base_analysis.rb +70 -0
  27. data/lib/ruby_maat/analysis/churn.rb +255 -0
  28. data/lib/ruby_maat/analysis/code_age.rb +53 -0
  29. data/lib/ruby_maat/analysis/commit_messages.rb +58 -0
  30. data/lib/ruby_maat/analysis/communication.rb +56 -0
  31. data/lib/ruby_maat/analysis/effort.rb +150 -0
  32. data/lib/ruby_maat/analysis/entities.rb +40 -0
  33. data/lib/ruby_maat/analysis/identity.rb +12 -0
  34. data/lib/ruby_maat/analysis/logical_coupling.rb +134 -0
  35. data/lib/ruby_maat/analysis/sum_of_coupling.rb +43 -0
  36. data/lib/ruby_maat/analysis/summary.rb +43 -0
  37. data/lib/ruby_maat/app.rb +143 -0
  38. data/lib/ruby_maat/change_record.rb +47 -0
  39. data/lib/ruby_maat/cli.rb +187 -0
  40. data/lib/ruby_maat/dataset.rb +205 -0
  41. data/lib/ruby_maat/groupers/layer_grouper.rb +67 -0
  42. data/lib/ruby_maat/groupers/team_mapper.rb +51 -0
  43. data/lib/ruby_maat/groupers/time_grouper.rb +70 -0
  44. data/lib/ruby_maat/output/csv_output.rb +65 -0
  45. data/lib/ruby_maat/parsers/base_parser.rb +63 -0
  46. data/lib/ruby_maat/parsers/git2_parser.rb +72 -0
  47. data/lib/ruby_maat/parsers/git_parser.rb +66 -0
  48. data/lib/ruby_maat/parsers/mercurial_parser.rb +64 -0
  49. data/lib/ruby_maat/parsers/perforce_parser.rb +77 -0
  50. data/lib/ruby_maat/parsers/svn_parser.rb +76 -0
  51. data/lib/ruby_maat/parsers/tfs_parser.rb +103 -0
  52. data/lib/ruby_maat/version.rb +5 -0
  53. data/lib/ruby_maat.rb +44 -0
  54. metadata +143 -0
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "optparse"
4
+ require "date"
5
+
6
+ module RubyMaat
7
+ # Command Line Interface - Ruby port of code-maat.cmd-line
8
+ class CLI
9
+ VERSION_INFO = "Ruby Maat version #{RubyMaat::VERSION} - A Ruby port of Code Maat".freeze
10
+
11
+ def initialize
12
+ @options = {}
13
+ @parser = build_option_parser
14
+ end
15
+
16
+ def run(args)
17
+ @parser.parse!(args)
18
+
19
+ if @options[:help]
20
+ puts usage
21
+ exit 0
22
+ end
23
+
24
+ validate_required_options!
25
+
26
+ app = App.new(@options)
27
+ app.run
28
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
29
+ warn "Error: #{e.message}"
30
+ warn usage
31
+ exit 1
32
+ rescue ArgumentError => e
33
+ warn "Error: #{e.message}"
34
+ warn usage
35
+ exit 1
36
+ rescue => e
37
+ warn "Error: #{e.message}"
38
+ warn e.backtrace.join("\n") if @options[:verbose]
39
+ exit 1
40
+ end
41
+
42
+ private
43
+
44
+ def build_option_parser
45
+ OptionParser.new do |opts|
46
+ opts.banner = usage_banner
47
+
48
+ # Required options
49
+ opts.on("-l", "--log LOG", "Log file with input data") do |log|
50
+ @options[:log] = log
51
+ end
52
+
53
+ opts.on("-c", "--version-control VCS",
54
+ "Input vcs module type: supports svn, git, git2, hg, p4, or tfs") do |vcs|
55
+ @options[:version_control] = vcs
56
+ end
57
+
58
+ # Analysis selection
59
+ opts.on("-a", "--analysis ANALYSIS",
60
+ "The analysis to run (#{App.analysis_names})",
61
+ "(default: authors)") do |analysis|
62
+ @options[:analysis] = analysis
63
+ end
64
+
65
+ # Input/Output options
66
+ opts.on("--input-encoding ENCODING",
67
+ "Specify an encoding other than UTF-8 for the log file") do |encoding|
68
+ @options[:input_encoding] = encoding
69
+ end
70
+
71
+ opts.on("-r", "--rows ROWS", Integer, "Max rows in output") do |rows|
72
+ @options[:rows] = rows
73
+ end
74
+
75
+ opts.on("-o", "--outfile OUTFILE", "Write the result to the given file name") do |outfile|
76
+ @options[:outfile] = outfile
77
+ end
78
+
79
+ # Grouping and mapping options
80
+ opts.on("-g", "--group GROUP",
81
+ "A file with a pre-defined set of layers. Data will be aggregated according to the group of layers.") do |group|
82
+ @options[:group] = group
83
+ end
84
+
85
+ opts.on("-p", "--team-map-file TEAM_MAP_FILE",
86
+ "A CSV file with author,team that translates individuals into teams.") do |team_map|
87
+ @options[:team_map_file] = team_map
88
+ end
89
+
90
+ # Analysis threshold options
91
+ opts.on("-n", "--min-revs MIN_REVS", Integer,
92
+ "Minimum number of revisions to include an entity in the analysis (default: 5)") do |min_revs|
93
+ @options[:min_revs] = min_revs
94
+ end
95
+
96
+ opts.on("-m", "--min-shared-revs MIN_SHARED_REVS", Integer,
97
+ "Minimum number of shared revisions to include an entity in the analysis (default: 5)") do |min_shared|
98
+ @options[:min_shared_revs] = min_shared
99
+ end
100
+
101
+ opts.on("-i", "--min-coupling MIN_COUPLING", Integer,
102
+ "Minimum degree of coupling (in percentage) to consider (default: 30)") do |min_coupling|
103
+ @options[:min_coupling] = min_coupling
104
+ end
105
+
106
+ opts.on("-x", "--max-coupling MAX_COUPLING", Integer,
107
+ "Maximum degree of coupling (in percentage) to consider (default: 100)") do |max_coupling|
108
+ @options[:max_coupling] = max_coupling
109
+ end
110
+
111
+ opts.on("-s", "--max-changeset-size MAX_CHANGESET_SIZE", Integer,
112
+ "Maximum number of modules in a change set if it shall be included in a coupling analysis (default: 30)") do |max_size|
113
+ @options[:max_changeset_size] = max_size
114
+ end
115
+
116
+ # Analysis-specific options
117
+ opts.on("-e", "--expression-to-match MATCH_EXPRESSION",
118
+ "A regex to match against commit messages. Used with -messages analyses") do |expression|
119
+ @options[:expression_to_match] = expression
120
+ end
121
+
122
+ opts.on("-t", "--temporal-period TEMPORAL_PERIOD",
123
+ "Used for coupling analyses. Instructs Ruby Maat to consider all commits during the rolling temporal period as a single, logical commit set") do |period|
124
+ @options[:temporal_period] = period
125
+ end
126
+
127
+ opts.on("-d", "--age-time-now AGE_TIME_NOW",
128
+ "Specify a date as YYYY-MM-dd that counts as time zero when doing a code age analysis") do |date_str|
129
+ @options[:age_time_now] = Date.parse(date_str)
130
+ rescue Date::Error
131
+ raise ArgumentError, "Invalid date format for --age-time-now: #{date_str}. Use YYYY-MM-dd format."
132
+ end
133
+
134
+ opts.on("--verbose-results",
135
+ "Includes additional analysis details together with the results. Only implemented for change coupling.") do
136
+ @options[:verbose_results] = true
137
+ end
138
+
139
+ # Help and version
140
+ opts.on("-h", "--help", "Show this help message") do
141
+ @options[:help] = true
142
+ end
143
+
144
+ opts.on("--version", "Show version information") do
145
+ puts VERSION_INFO
146
+ exit 0
147
+ end
148
+
149
+ opts.on("--verbose", "Enable verbose error output") do
150
+ @options[:verbose] = true
151
+ end
152
+ end
153
+ end
154
+
155
+ def usage_banner
156
+ <<~BANNER
157
+ #{VERSION_INFO}
158
+
159
+ This is Ruby Maat, a Ruby port of Code Maat - a program used to collect statistics from a VCS.
160
+
161
+ Usage: ruby-maat -l log-file -c vcs-type [options]
162
+
163
+ Options:
164
+ BANNER
165
+ end
166
+
167
+ def usage
168
+ @parser.help
169
+ end
170
+
171
+ def validate_required_options!
172
+ missing = []
173
+ missing << "log file (-l/--log)" unless @options[:log]
174
+ missing << "version control system (-c/--version-control)" unless @options[:version_control]
175
+
176
+ raise ArgumentError, "Missing required options: #{missing.join(", ")}" unless missing.empty?
177
+
178
+ # Set defaults
179
+ @options[:analysis] ||= "authors"
180
+ @options[:min_revs] ||= 5
181
+ @options[:min_shared_revs] ||= 5
182
+ @options[:min_coupling] ||= 30
183
+ @options[:max_coupling] ||= 100
184
+ @options[:max_changeset_size] ||= 30
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rover"
4
+
5
+ module RubyMaat
6
+ # Wrapper around Rover DataFrame to provide domain-specific operations
7
+ # This replaces Incanter datasets from the Clojure version
8
+ class Dataset
9
+ def initialize(change_records = [])
10
+ @data = build_dataframe(change_records)
11
+ end
12
+
13
+ def self.from_changes(change_records)
14
+ new(change_records)
15
+ end
16
+
17
+ def to_df
18
+ @data
19
+ end
20
+
21
+ # Group by entity and count distinct authors
22
+ def group_by_entity_count_authors
23
+ @data.group(:entity).count(:author, name: "n_authors")
24
+ end
25
+
26
+ # Group by entity and count revisions
27
+ def group_by_entity_count_revisions
28
+ @data.group(:entity).count(:revision, name: "n_revs")
29
+ end
30
+
31
+ # Group by author and sum churn metrics
32
+ def group_by_author_sum_churn
33
+ @data.group(:author).sum(%i[loc_added loc_deleted])
34
+ end
35
+
36
+ # Group by entity and sum churn metrics
37
+ def group_by_entity_sum_churn
38
+ @data.group(:entity).sum(%i[loc_added loc_deleted])
39
+ end
40
+
41
+ # Get all entities (files)
42
+ def entities
43
+ return [] if @data.empty?
44
+
45
+ @data[:entity].uniq
46
+ end
47
+
48
+ # Get all authors
49
+ def authors
50
+ return [] if @data.empty?
51
+
52
+ @data[:author].uniq
53
+ end
54
+
55
+ # Filter by minimum revisions
56
+ def filter_min_revisions(min_revs)
57
+ # Group by entity and count revisions
58
+ entity_revision_counts = {}
59
+ @data.to_a.each do |row|
60
+ entity = row["entity"]
61
+ revision = row["revision"]
62
+ entity_revision_counts[entity] ||= Set.new
63
+ entity_revision_counts[entity] << revision
64
+ end
65
+
66
+ # Find entities with enough revisions
67
+ entities_to_keep = entity_revision_counts.select { |_, revisions| revisions.size >= min_revs }.keys
68
+
69
+ # Filter data to only include those entities
70
+ filtered_records = []
71
+ @data.to_a.each do |row|
72
+ filtered_records << row if entities_to_keep.include?(row["entity"])
73
+ end
74
+
75
+ # Build new dataset from filtered records
76
+ change_records = filtered_records.map do |record|
77
+ ChangeRecord.new(
78
+ entity: record["entity"],
79
+ author: record["author"],
80
+ date: record["date"],
81
+ revision: record["revision"],
82
+ message: record["message"],
83
+ loc_added: record["loc_added"],
84
+ loc_deleted: record["loc_deleted"]
85
+ )
86
+ end
87
+
88
+ Dataset.from_changes(change_records)
89
+ end
90
+
91
+ # Get coupling pairs (combinations of entities that changed together)
92
+ def coupling_pairs
93
+ # Group by revision to find entities that changed together
94
+ revision_entities = {}
95
+
96
+ @data.to_a.each do |row|
97
+ revision = row["revision"]
98
+ entity = row["entity"]
99
+
100
+ revision_entities[revision] ||= []
101
+ revision_entities[revision] << entity unless revision_entities[revision].include?(entity)
102
+ end
103
+
104
+ pairs = []
105
+ revision_entities.each_value do |entities|
106
+ entities.combination(2) do |entity1, entity2|
107
+ pairs << [entity1, entity2]
108
+ end
109
+ end
110
+
111
+ pairs
112
+ end
113
+
114
+ # Count shared revisions between entity pairs
115
+ def shared_revisions_count(entity1, entity2)
116
+ entity1_revs = Set.new
117
+ entity2_revs = Set.new
118
+
119
+ @data.to_a.each do |row|
120
+ if row["entity"] == entity1
121
+ entity1_revs << row["revision"]
122
+ elsif row["entity"] == entity2
123
+ entity2_revs << row["revision"]
124
+ end
125
+ end
126
+
127
+ (entity1_revs & entity2_revs).size
128
+ end
129
+
130
+ # Get revision count for an entity
131
+ def revision_count(entity)
132
+ revisions = Set.new
133
+ @data.to_a.each do |row|
134
+ revisions << row["revision"] if row["entity"] == entity
135
+ end
136
+ revisions.size
137
+ end
138
+
139
+ # Get unique dates
140
+ def unique_dates
141
+ @data[:date].uniq.sort
142
+ end
143
+
144
+ # Filter by date range
145
+ def filter_date_range(start_date, end_date)
146
+ filtered_records = []
147
+ @data.each_row do |row|
148
+ next unless row[:date].between?(start_date, end_date)
149
+
150
+ filtered_records << ChangeRecord.new(
151
+ entity: row[:entity],
152
+ author: row[:author],
153
+ date: row[:date],
154
+ revision: row[:revision],
155
+ message: row[:message],
156
+ loc_added: row[:loc_added],
157
+ loc_deleted: row[:loc_deleted]
158
+ )
159
+ end
160
+
161
+ Dataset.from_changes(filtered_records)
162
+ end
163
+
164
+ # Get latest date for each entity (for age analysis)
165
+ def latest_date_by_entity
166
+ @data.group(:entity).max(:date)
167
+ end
168
+
169
+ def size
170
+ @data.count
171
+ end
172
+
173
+ def empty?
174
+ @data.empty?
175
+ end
176
+
177
+ private
178
+
179
+ def build_dataframe(change_records)
180
+ return Rover::DataFrame.new if change_records.empty?
181
+
182
+ data_hash = {
183
+ "entity" => [],
184
+ "author" => [],
185
+ "date" => [],
186
+ "revision" => [],
187
+ "message" => [],
188
+ "loc_added" => [],
189
+ "loc_deleted" => []
190
+ }
191
+
192
+ change_records.each do |record|
193
+ data_hash["entity"] << record.entity
194
+ data_hash["author"] << record.author
195
+ data_hash["date"] << record.date
196
+ data_hash["revision"] << record.revision
197
+ data_hash["message"] << record.message
198
+ data_hash["loc_added"] << record.loc_added
199
+ data_hash["loc_deleted"] << record.loc_deleted
200
+ end
201
+
202
+ Rover::DataFrame.new(data_hash)
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Groupers
5
+ # Layer grouper - maps individual files to architectural layers using regex patterns
6
+ class LayerGrouper
7
+ def initialize(grouping_file)
8
+ @grouping_file = grouping_file
9
+ @patterns = load_grouping_patterns
10
+ end
11
+
12
+ def group(change_records)
13
+ change_records.map do |record|
14
+ new_entity = map_entity_to_layer(record.entity)
15
+
16
+ # Create new record with mapped entity name
17
+ ChangeRecord.new(
18
+ entity: new_entity,
19
+ author: record.author,
20
+ date: record.date,
21
+ revision: record.revision,
22
+ message: record.message,
23
+ loc_added: record.loc_added,
24
+ loc_deleted: record.loc_deleted
25
+ )
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def load_grouping_patterns
32
+ patterns = []
33
+
34
+ File.foreach(@grouping_file) do |line|
35
+ line = line.strip
36
+ next if line.empty? || line.start_with?("#")
37
+
38
+ if line.include?("=>")
39
+ pattern_str, layer_name = line.split("=>", 2)
40
+ pattern_str = pattern_str.strip
41
+ layer_name = layer_name.strip
42
+
43
+ begin
44
+ regex = Regexp.new(pattern_str)
45
+ patterns << {regex: regex, layer: layer_name}
46
+ rescue RegexpError => e
47
+ warn "Invalid regex pattern '#{pattern_str}': #{e.message}"
48
+ end
49
+ end
50
+ end
51
+
52
+ patterns
53
+ rescue => e
54
+ raise ArgumentError, "Failed to load grouping file #{@grouping_file}: #{e.message}"
55
+ end
56
+
57
+ def map_entity_to_layer(entity)
58
+ @patterns.each do |pattern_info|
59
+ return pattern_info[:layer] if entity.match?(pattern_info[:regex])
60
+ end
61
+
62
+ # If no pattern matches, return the original entity name
63
+ entity
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module RubyMaat
6
+ module Groupers
7
+ # Team mapper - maps individual authors to teams
8
+ class TeamMapper
9
+ def initialize(team_map_file)
10
+ @team_map_file = team_map_file
11
+ @author_to_team = load_team_mapping
12
+ end
13
+
14
+ def map(change_records)
15
+ change_records.map do |record|
16
+ team_name = @author_to_team[record.author] || record.author
17
+
18
+ # Create new record with team name instead of individual author
19
+ ChangeRecord.new(
20
+ entity: record.entity,
21
+ author: team_name,
22
+ date: record.date,
23
+ revision: record.revision,
24
+ message: record.message,
25
+ loc_added: record.loc_added,
26
+ loc_deleted: record.loc_deleted
27
+ )
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def load_team_mapping
34
+ mapping = {}
35
+
36
+ CSV.foreach(@team_map_file, headers: true) do |row|
37
+ author = row["author"] || row[0]
38
+ team = row["team"] || row[1]
39
+
40
+ next unless author && team
41
+
42
+ mapping[author.strip] = team.strip
43
+ end
44
+
45
+ mapping
46
+ rescue => e
47
+ raise ArgumentError, "Failed to load team mapping file #{@team_map_file}: #{e.message}"
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Groupers
5
+ # Time grouper - aggregates commits within temporal periods
6
+ class TimeGrouper
7
+ def initialize(temporal_period)
8
+ @temporal_period = temporal_period
9
+ end
10
+
11
+ def group(change_records)
12
+ # For now, implement daily aggregation (group commits by day)
13
+ # The temporal_period parameter could be extended for other periods
14
+
15
+ grouped_by_date_and_entity = {}
16
+
17
+ change_records.each do |record|
18
+ date = record.date
19
+ entity = record.entity
20
+
21
+ key = [date, entity]
22
+ grouped_by_date_and_entity[key] ||= []
23
+ grouped_by_date_and_entity[key] << record
24
+ end
25
+
26
+ # Create aggregated records for each group
27
+ aggregated_records = []
28
+
29
+ grouped_by_date_and_entity.each do |(date, entity), records|
30
+ # Aggregate the records for this date/entity combination
31
+ aggregated_record = aggregate_records(records, date, entity)
32
+ aggregated_records << aggregated_record
33
+ end
34
+
35
+ aggregated_records
36
+ end
37
+
38
+ private
39
+
40
+ def aggregate_records(records, date, entity)
41
+ # Use the first record as the base
42
+ first_record = records.first
43
+
44
+ # Aggregate numeric values
45
+ total_added = records.sum { |r| r.loc_added || 0 }
46
+ total_deleted = records.sum { |r| r.loc_deleted || 0 }
47
+
48
+ # Combine commit messages
49
+ messages = records.filter_map(&:message).uniq
50
+ combined_message = messages.join("; ")
51
+
52
+ # Use first revision as representative (could be improved)
53
+ revision = first_record.revision
54
+
55
+ # Use first author (could be improved to handle multiple authors)
56
+ author = first_record.author
57
+
58
+ ChangeRecord.new(
59
+ entity: entity,
60
+ author: author,
61
+ date: date,
62
+ revision: revision,
63
+ message: combined_message,
64
+ loc_added: total_added,
65
+ loc_deleted: total_deleted
66
+ )
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module RubyMaat
6
+ module Output
7
+ # CSV output handler - formats and writes analysis results as CSV
8
+ class CsvOutput
9
+ def initialize(output_file = nil, max_rows = nil)
10
+ @output_file = output_file
11
+ @max_rows = max_rows
12
+ end
13
+
14
+ def write(dataframe)
15
+ # Convert dataframe to CSV
16
+ output_stream = @output_file ? File.open(@output_file, "w") : $stdout
17
+
18
+ begin
19
+ write_csv(dataframe, output_stream)
20
+ ensure
21
+ output_stream.close if @output_file
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def write_csv(dataframe, stream)
28
+ # Write CSV
29
+ csv = CSV.new(stream)
30
+
31
+ # Get column names (even empty dataframes should have column structure)
32
+ columns = dataframe.keys
33
+
34
+ # Write header
35
+ csv << columns
36
+
37
+ # Write data rows (skip if empty)
38
+ return if dataframe.empty?
39
+
40
+ row_count = 0
41
+ dataframe.each_row do |row|
42
+ break if @max_rows && row_count >= @max_rows
43
+
44
+ csv_row = columns.map { |col| format_value(row[col]) }
45
+ csv << csv_row
46
+ row_count += 1
47
+ end
48
+ end
49
+
50
+ def format_value(value)
51
+ case value
52
+ when Date
53
+ value.strftime("%Y-%m-%d")
54
+ when Float
55
+ # Round floats to reasonable precision
56
+ value.round(3)
57
+ when NilClass
58
+ ""
59
+ else
60
+ value.to_s
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyMaat
4
+ module Parsers
5
+ # Base class for all VCS parsers
6
+ class BaseParser
7
+ def initialize(log_file, options = {})
8
+ @log_file = log_file
9
+ @options = options
10
+ @encoding = options[:input_encoding] || "UTF-8"
11
+ end
12
+
13
+ def parse
14
+ validate_file_exists!
15
+ content = read_log_file
16
+ parse_content(content)
17
+ rescue => e
18
+ handle_parse_error(e)
19
+ end
20
+
21
+ protected
22
+
23
+ def read_log_file
24
+ File.read(@log_file, encoding: @encoding)
25
+ rescue Encoding::InvalidByteSequenceError
26
+ raise ArgumentError, "Invalid encoding for log file. Try specifying --input-encoding"
27
+ end
28
+
29
+ def validate_file_exists!
30
+ return if File.exist?(@log_file)
31
+
32
+ raise ArgumentError, "Log file not found: #{@log_file}"
33
+ end
34
+
35
+ def parse_content(content)
36
+ raise NotImplementedError, "Subclasses must implement parse_content"
37
+ end
38
+
39
+ def handle_parse_error(error)
40
+ case error
41
+ when ArgumentError
42
+ raise error
43
+ else
44
+ vcs_name = self.class.name.split("::").last.gsub("Parser", "")
45
+ raise ArgumentError, "#{vcs_name}: Failed to parse the given file - is it a valid logfile? (#{error.message})"
46
+ end
47
+ end
48
+
49
+ def parse_date(date_str)
50
+ Date.parse(date_str)
51
+ rescue Date::Error
52
+ raise ArgumentError, "Invalid date format: #{date_str}"
53
+ end
54
+
55
+ # Helper to clean up binary file indicators and handle edge cases
56
+ def clean_numstat(value)
57
+ return nil if value.nil? || value.empty? || value == "-"
58
+
59
+ value.to_i
60
+ end
61
+ end
62
+ end
63
+ end