junit_timing_splitter 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c22c4ca7f4c88bf202a2210e0f0e0739a3526c253e4936c96da3eef4128cab86
4
+ data.tar.gz: f162822a717e86cefa18d4f6ececc44bb2db61d7d02d7490c089f795f35eb7af
5
+ SHA512:
6
+ metadata.gz: 69387862b8500f9ee3a1a4b5e944b9f2902c89d93c63125c8eaae38994ba8527fb10327df1888a4ea8c9d51d2d948ea0a2afca1f58bcbd3867a22aad097ec624
7
+ data.tar.gz: 68e59f1bf1c3039f4a6fdd19d67b2a5b4a970262035bccb38a15f639345ca4ef7531a6db401760db5bd8a37122b429655ad9f44f0e1282d075de75c527d073f0
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env
2
+
3
+ require 'junit_timing_splitter'
4
+
5
+ JunitTimingSplitter::Cli.start(ARGV)
@@ -0,0 +1,21 @@
1
+ module JunitTimingSplitter
2
+ class Bucket
3
+ attr_accessor :files, :total_time
4
+
5
+ def initialize(files: [], total_time: 0.0)
6
+ @files = files
7
+ @total_time = total_time
8
+ end
9
+
10
+ def to_h
11
+ {
12
+ files: @files,
13
+ total_time: @total_time
14
+ }
15
+ end
16
+
17
+ def to_s
18
+ files.join(' ')
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,84 @@
1
+ # The CLI to split testcases into n buckets and read a specific bucket
2
+ module JunitTimingSplitter
3
+ class Cli < Thor
4
+ desc 'split', 'Split test files by timing'
5
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
6
+ option :buckets, required: true, aliases: '-b', type: :numeric, desc: 'Number of buckets'
7
+ option :schema, required: true, aliases: '-o', desc: 'Output JSON file that contains information for each bucket'
8
+ def split
9
+ parsed_timings = JunitTimingSplitter::Parser.new(options[:files]).execute
10
+ buckets = JunitTimingSplitter::Splitter.new(parsed_timings, options[:buckets]).execute
11
+ buckets_as_hashes = buckets.map(&:to_h)
12
+ FileUtils.mkdir_p(File.dirname(options[:schema]))
13
+ File.write(options[:schema], JSON.pretty_generate(buckets_as_hashes))
14
+ puts "Buckets written to #{options[:schema]}"
15
+ end
16
+
17
+ desc 'show', 'Show test files of a specific bucket from JSON file'
18
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
19
+ option :bucket, required: true, aliases: '-i', type: :numeric, desc: 'Bucket number to read'
20
+ def show
21
+ begin
22
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
23
+ files = schema.files_for_bucket(options[:bucket].to_i)
24
+
25
+ if files.any?
26
+ puts files.join(' ')
27
+ else
28
+ puts 'Bucket not found'
29
+ exit(1)
30
+ end
31
+ rescue IOError => e
32
+ puts e.message
33
+ exit(1)
34
+ end
35
+ end
36
+
37
+ desc 'scan', 'Scan folder or glob path for missing test files'
38
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
39
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
40
+ def scan
41
+ begin
42
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
43
+ missing_files = schema.scan_missing_files(options[:files])
44
+
45
+ if missing_files.empty?
46
+ puts 'No missing test files detected.'
47
+ else
48
+ puts 'Missing test files:'
49
+ missing_files.each { |file| puts file }
50
+ end
51
+ rescue IOError => e
52
+ puts e.message
53
+ exit(1)
54
+ end
55
+ end
56
+
57
+ desc 'merge', 'Merge missing test files into buckets'
58
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
59
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
60
+ def merge
61
+ begin
62
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
63
+ missing_files = schema.scan_missing_files(options[:files])
64
+
65
+ if missing_files.empty?
66
+ puts 'No missing test files to merge.'
67
+ else
68
+ splitter = JunitTimingSplitter::Splitter.new([], schema.buckets.size)
69
+ buckets = splitter.merge_missing_files(missing_files)
70
+ buckets_as_hashes = buckets.map(&:to_h)
71
+ File.write(options[:schema], JSON.pretty_generate(buckets_as_hashes))
72
+ puts "Missing files merged into buckets and written to #{options[:schema]}"
73
+ end
74
+ rescue IOError => e
75
+ puts e.message
76
+ exit(1)
77
+ end
78
+ end
79
+
80
+ def self.exit_on_failure?
81
+ true
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,11 @@
1
+ module JunitTimingSplitter
2
+ class ParsedTiming
3
+ attr_accessor :file, :total_time, :total_testcases
4
+
5
+ def initialize(file:, total_time:, total_testcases: 1)
6
+ @file = file
7
+ @total_time = total_time
8
+ @total_testcases = total_testcases
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,39 @@
1
+ module JunitTimingSplitter
2
+ class Parser
3
+ attr_reader :file_paths, :files, :parsed_timings
4
+
5
+ def initialize(file_paths)
6
+ @file_paths = file_paths
7
+ @files = Dir.glob(file_paths)
8
+ @parsed_timings = []
9
+
10
+ puts "Detected #{files.size} files"
11
+ @files.each { |file| puts "Detected file: #{file}" }
12
+ end
13
+
14
+ # Parse multiple rspec-results.xml files into a list of files and their execution times
15
+ def execute
16
+ files.each do |file_path|
17
+ File.open(file_path) do |file|
18
+ doc = Nokogiri::XML(file)
19
+
20
+ doc.xpath('//testcase').each do |testcase|
21
+ file = testcase['file']
22
+ time = testcase['time'].to_f
23
+ next if file.nil? || time.nil?
24
+
25
+ existing = @parsed_timings.find { |pd| pd.file == file }
26
+ if existing
27
+ existing.total_time += time
28
+ existing.total_testcases += 1
29
+ else
30
+ @parsed_timings << ParsedTiming.new(file: file, total_time: time)
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ @parsed_timings
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,37 @@
1
+ module JunitTimingSplitter
2
+ class Schema
3
+ attr_reader :path, :buckets
4
+
5
+ def initialize(path)
6
+ @path = path
7
+ validate_file
8
+ @buckets = JSON.parse(File.read(path))
9
+ end
10
+
11
+ # Retrieve files from a specific bucket
12
+ def files_for_bucket(bucket_number)
13
+ bucket = buckets[bucket_number]
14
+ bucket ? bucket['files'] : []
15
+ end
16
+
17
+ # Retrieve all parsed files across all buckets
18
+ def all_parsed_files
19
+ buckets.flat_map { |bucket| bucket['files'] }
20
+ end
21
+
22
+ # Scan for missing files in a specified glob path
23
+ def scan_missing_files(glob_path)
24
+ parsed_files = all_parsed_files.map { |file| File.expand_path(file) }
25
+ all_files = Dir.glob(glob_path).map { |file| File.expand_path(file) }
26
+ all_files - parsed_files
27
+ end
28
+
29
+ private
30
+
31
+ def validate_file
32
+ unless File.exist?(path)
33
+ raise IOError, "Schema file not found: #{path}"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,59 @@
1
+ module JunitTimingSplitter
2
+ class Splitter
3
+ attr_reader :parsed_timings, :total_splits, :buckets
4
+
5
+ def initialize(parsed_timings, total_splits)
6
+ @parsed_timings = parsed_timings
7
+ @total_splits = total_splits
8
+ @buckets = Array.new(total_splits) { Bucket.new }
9
+ end
10
+
11
+ # Split the parsed timings into buckets based on total_splits
12
+ def execute
13
+ # Sort by time descending
14
+ sorted_timings = parsed_timings.sort_by { |parsed_timing| -parsed_timing.total_time }
15
+
16
+ # Initialize buckets
17
+ @buckets = Array.new(total_splits) { Bucket.new }
18
+
19
+ # Greedily distribute files to minimize total time imbalance
20
+ sorted_timings.each do |timing|
21
+ min_bucket = @buckets.min_by { |bucket| bucket.total_time }
22
+ min_bucket.files << timing.file
23
+ min_bucket.total_time += timing.total_time
24
+ end
25
+
26
+ @buckets
27
+ end
28
+
29
+ def merge_missing_files(missing_files)
30
+ missing_files.each_with_index do |file, index|
31
+ bucket = @buckets[index % total_splits]
32
+ bucket.files << file
33
+ # Assuming a default time for missing files, e.g., 1.0
34
+ bucket.total_time += 1.0
35
+ end
36
+ @buckets
37
+ end
38
+
39
+ # Command to display a specific split, start with index 0
40
+ def inspect(split_number: nil)
41
+ unless split_number
42
+ buckets.each_with_index do |bucket, index|
43
+ puts "[BUCKET #{index} - #{bucket.total_time.round(2)}s] #{bucket.files.join(', ')}"
44
+ end
45
+
46
+ return
47
+ end
48
+
49
+ # split_number exists
50
+ if split_number > total_splits - 1
51
+ puts "Invalid split number. Total splits: #{total_splits}"
52
+ return
53
+ end
54
+
55
+ specific_bucket = buckets[split_number]
56
+ puts "[BUCKET #{split_number} - #{specific_bucket.total_time.round(2)}s] #{specific_bucket.files.join(', ')}"
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ require 'nokogiri'
2
+ require "zeitwerk"
3
+ require 'thor'
4
+ require 'json'
5
+
6
+ # Example Usage:
7
+ # To analyze all XML files:
8
+ # parsed_timings = JunitTimingSplitter::Parser.new('results_*.xml').execute
9
+ # buckets = JunitTimingSplitter::Split.new(parsed_timings, 5).execute
10
+ module JunitTimingSplitter
11
+ loader = Zeitwerk::Loader.for_gem
12
+ loader.setup # ready!
13
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: junit_timing_splitter
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Kim Yu Ng
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.15'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
33
+ - !ruby/object:Gem::Dependency
34
+ name: thor
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.1'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.1.0
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.1'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.1.0
67
+ - !ruby/object:Gem::Dependency
68
+ name: zeitwerk
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '2.4'
74
+ - - "<"
75
+ - !ruby/object:Gem::Version
76
+ version: '3.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '2.4'
84
+ - - "<"
85
+ - !ruby/object:Gem::Version
86
+ version: '3.0'
87
+ description: A tool to optimize parallel test execution by analyzing JUnit XML results
88
+ and distributing test files across buckets.
89
+ email: kimyu92@gmail.com
90
+ executables:
91
+ - junit_timing_splitter
92
+ extensions: []
93
+ extra_rdoc_files: []
94
+ files:
95
+ - bin/junit_timing_splitter
96
+ - lib/junit_timing_splitter.rb
97
+ - lib/junit_timing_splitter/bucket.rb
98
+ - lib/junit_timing_splitter/cli.rb
99
+ - lib/junit_timing_splitter/parsed_timing.rb
100
+ - lib/junit_timing_splitter/parser.rb
101
+ - lib/junit_timing_splitter/schema.rb
102
+ - lib/junit_timing_splitter/splitter.rb
103
+ homepage: https://github.com/kimyu92/junit_timing_splitter
104
+ licenses:
105
+ - MIT
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '3.1'
116
+ - - "<"
117
+ - !ruby/object:Gem::Version
118
+ version: '4.0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubygems_version: 3.5.22
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Split test files into evenly distributed buckets based on execution time
129
+ test_files: []