junit_timing_splitter 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c22c4ca7f4c88bf202a2210e0f0e0739a3526c253e4936c96da3eef4128cab86
4
+ data.tar.gz: f162822a717e86cefa18d4f6ececc44bb2db61d7d02d7490c089f795f35eb7af
5
+ SHA512:
6
+ metadata.gz: 69387862b8500f9ee3a1a4b5e944b9f2902c89d93c63125c8eaae38994ba8527fb10327df1888a4ea8c9d51d2d948ea0a2afca1f58bcbd3867a22aad097ec624
7
+ data.tar.gz: 68e59f1bf1c3039f4a6fdd19d67b2a5b4a970262035bccb38a15f639345ca4ef7531a6db401760db5bd8a37122b429655ad9f44f0e1282d075de75c527d073f0
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env
2
+
3
+ require 'junit_timing_splitter'
4
+
5
+ JunitTimingSplitter::Cli.start(ARGV)
@@ -0,0 +1,21 @@
1
+ module JunitTimingSplitter
2
+ class Bucket
3
+ attr_accessor :files, :total_time
4
+
5
+ def initialize(files: [], total_time: 0.0)
6
+ @files = files
7
+ @total_time = total_time
8
+ end
9
+
10
+ def to_h
11
+ {
12
+ files: @files,
13
+ total_time: @total_time
14
+ }
15
+ end
16
+
17
+ def to_s
18
+ files.join(' ')
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,84 @@
1
+ # The CLI to split testcases into n buckets and read a specific bucket
2
+ module JunitTimingSplitter
3
+ class Cli < Thor
4
+ desc 'split', 'Split test files by timing'
5
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
6
+ option :buckets, required: true, aliases: '-b', type: :numeric, desc: 'Number of buckets'
7
+ option :schema, required: true, aliases: '-o', desc: 'Output JSON file that contains information for each bucket'
8
+ def split
9
+ parsed_timings = JunitTimingSplitter::Parser.new(options[:files]).execute
10
+ buckets = JunitTimingSplitter::Splitter.new(parsed_timings, options[:buckets]).execute
11
+ buckets_as_hashes = buckets.map(&:to_h)
12
+ FileUtils.mkdir_p(File.dirname(options[:schema]))
13
+ File.write(options[:schema], JSON.pretty_generate(buckets_as_hashes))
14
+ puts "Buckets written to #{options[:schema]}"
15
+ end
16
+
17
+ desc 'show', 'Show test files of a specific bucket from JSON file'
18
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
19
+ option :bucket, required: true, aliases: '-i', type: :numeric, desc: 'Bucket number to read'
20
+ def show
21
+ begin
22
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
23
+ files = schema.files_for_bucket(options[:bucket].to_i)
24
+
25
+ if files.any?
26
+ puts files.join(' ')
27
+ else
28
+ puts 'Bucket not found'
29
+ exit(1)
30
+ end
31
+ rescue IOError => e
32
+ puts e.message
33
+ exit(1)
34
+ end
35
+ end
36
+
37
+ desc 'scan', 'Scan folder or glob path for missing test files'
38
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
39
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
40
+ def scan
41
+ begin
42
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
43
+ missing_files = schema.scan_missing_files(options[:files])
44
+
45
+ if missing_files.empty?
46
+ puts 'No missing test files detected.'
47
+ else
48
+ puts 'Missing test files:'
49
+ missing_files.each { |file| puts file }
50
+ end
51
+ rescue IOError => e
52
+ puts e.message
53
+ exit(1)
54
+ end
55
+ end
56
+
57
+ desc 'merge', 'Merge missing test files into buckets'
58
+ option :schema, required: true, aliases: '-s', desc: 'Specific Generated JSON file from split step'
59
+ option :files, required: true, aliases: '-f', desc: 'Glob path to scan for test files'
60
+ def merge
61
+ begin
62
+ schema = JunitTimingSplitter::Schema.new(options[:schema])
63
+ missing_files = schema.scan_missing_files(options[:files])
64
+
65
+ if missing_files.empty?
66
+ puts 'No missing test files to merge.'
67
+ else
68
+ splitter = JunitTimingSplitter::Splitter.new([], schema.buckets.size)
69
+ buckets = splitter.merge_missing_files(missing_files)
70
+ buckets_as_hashes = buckets.map(&:to_h)
71
+ File.write(options[:schema], JSON.pretty_generate(buckets_as_hashes))
72
+ puts "Missing files merged into buckets and written to #{options[:schema]}"
73
+ end
74
+ rescue IOError => e
75
+ puts e.message
76
+ exit(1)
77
+ end
78
+ end
79
+
80
+ def self.exit_on_failure?
81
+ true
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,11 @@
1
+ module JunitTimingSplitter
2
+ class ParsedTiming
3
+ attr_accessor :file, :total_time, :total_testcases
4
+
5
+ def initialize(file:, total_time:, total_testcases: 1)
6
+ @file = file
7
+ @total_time = total_time
8
+ @total_testcases = total_testcases
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,39 @@
1
+ module JunitTimingSplitter
2
+ class Parser
3
+ attr_reader :file_paths, :files, :parsed_timings
4
+
5
+ def initialize(file_paths)
6
+ @file_paths = file_paths
7
+ @files = Dir.glob(file_paths)
8
+ @parsed_timings = []
9
+
10
+ puts "Detected #{files.size} files"
11
+ @files.each { |file| puts "Detected file: #{file}" }
12
+ end
13
+
14
+ # Parse multiple rspec-results.xml files into a list of files and their execution times
15
+ def execute
16
+ files.each do |file_path|
17
+ File.open(file_path) do |file|
18
+ doc = Nokogiri::XML(file)
19
+
20
+ doc.xpath('//testcase').each do |testcase|
21
+ file = testcase['file']
22
+ time = testcase['time'].to_f
23
+ next if file.nil? || time.nil?
24
+
25
+ existing = @parsed_timings.find { |pd| pd.file == file }
26
+ if existing
27
+ existing.total_time += time
28
+ existing.total_testcases += 1
29
+ else
30
+ @parsed_timings << ParsedTiming.new(file: file, total_time: time)
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ @parsed_timings
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,37 @@
1
+ module JunitTimingSplitter
2
+ class Schema
3
+ attr_reader :path, :buckets
4
+
5
+ def initialize(path)
6
+ @path = path
7
+ validate_file
8
+ @buckets = JSON.parse(File.read(path))
9
+ end
10
+
11
+ # Retrieve files from a specific bucket
12
+ def files_for_bucket(bucket_number)
13
+ bucket = buckets[bucket_number]
14
+ bucket ? bucket['files'] : []
15
+ end
16
+
17
+ # Retrieve all parsed files across all buckets
18
+ def all_parsed_files
19
+ buckets.flat_map { |bucket| bucket['files'] }
20
+ end
21
+
22
+ # Scan for missing files in a specified glob path
23
+ def scan_missing_files(glob_path)
24
+ parsed_files = all_parsed_files.map { |file| File.expand_path(file) }
25
+ all_files = Dir.glob(glob_path).map { |file| File.expand_path(file) }
26
+ all_files - parsed_files
27
+ end
28
+
29
+ private
30
+
31
+ def validate_file
32
+ unless File.exist?(path)
33
+ raise IOError, "Schema file not found: #{path}"
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,59 @@
1
+ module JunitTimingSplitter
2
+ class Splitter
3
+ attr_reader :parsed_timings, :total_splits, :buckets
4
+
5
+ def initialize(parsed_timings, total_splits)
6
+ @parsed_timings = parsed_timings
7
+ @total_splits = total_splits
8
+ @buckets = Array.new(total_splits) { Bucket.new }
9
+ end
10
+
11
+ # Split the parsed timings into buckets based on total_splits
12
+ def execute
13
+ # Sort by time descending
14
+ sorted_timings = parsed_timings.sort_by { |parsed_timing| -parsed_timing.total_time }
15
+
16
+ # Initialize buckets
17
+ @buckets = Array.new(total_splits) { Bucket.new }
18
+
19
+ # Greedily distribute files to minimize total time imbalance
20
+ sorted_timings.each do |timing|
21
+ min_bucket = @buckets.min_by { |bucket| bucket.total_time }
22
+ min_bucket.files << timing.file
23
+ min_bucket.total_time += timing.total_time
24
+ end
25
+
26
+ @buckets
27
+ end
28
+
29
+ def merge_missing_files(missing_files)
30
+ missing_files.each_with_index do |file, index|
31
+ bucket = @buckets[index % total_splits]
32
+ bucket.files << file
33
+ # Assuming a default time for missing files, e.g., 1.0
34
+ bucket.total_time += 1.0
35
+ end
36
+ @buckets
37
+ end
38
+
39
+ # Command to display a specific split, start with index 0
40
+ def inspect(split_number: nil)
41
+ unless split_number
42
+ buckets.each_with_index do |bucket, index|
43
+ puts "[BUCKET #{index} - #{bucket.total_time.round(2)}s] #{bucket.files.join(', ')}"
44
+ end
45
+
46
+ return
47
+ end
48
+
49
+ # split_number exists
50
+ if split_number > total_splits - 1
51
+ puts "Invalid split number. Total splits: #{total_splits}"
52
+ return
53
+ end
54
+
55
+ specific_bucket = buckets[split_number]
56
+ puts "[BUCKET #{split_number} - #{specific_bucket.total_time.round(2)}s] #{specific_bucket.files.join(', ')}"
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ require 'nokogiri'
2
+ require "zeitwerk"
3
+ require 'thor'
4
+ require 'json'
5
+
6
+ # Example Usage:
7
+ # To analyze all XML files:
8
+ # parsed_timings = JunitTimingSplitter::Parser.new('results_*.xml').execute
9
+ # buckets = JunitTimingSplitter::Split.new(parsed_timings, 5).execute
10
+ module JunitTimingSplitter
11
+ loader = Zeitwerk::Loader.for_gem
12
+ loader.setup # ready!
13
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: junit_timing_splitter
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Kim Yu Ng
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.15'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.0'
33
+ - !ruby/object:Gem::Dependency
34
+ name: thor
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.1'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.1.0
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.1'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.1.0
67
+ - !ruby/object:Gem::Dependency
68
+ name: zeitwerk
69
+ requirement: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '2.4'
74
+ - - "<"
75
+ - !ruby/object:Gem::Version
76
+ version: '3.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '2.4'
84
+ - - "<"
85
+ - !ruby/object:Gem::Version
86
+ version: '3.0'
87
+ description: A tool to optimize parallel test execution by analyzing JUnit XML results
88
+ and distributing test files across buckets.
89
+ email: kimyu92@gmail.com
90
+ executables:
91
+ - junit_timing_splitter
92
+ extensions: []
93
+ extra_rdoc_files: []
94
+ files:
95
+ - bin/junit_timing_splitter
96
+ - lib/junit_timing_splitter.rb
97
+ - lib/junit_timing_splitter/bucket.rb
98
+ - lib/junit_timing_splitter/cli.rb
99
+ - lib/junit_timing_splitter/parsed_timing.rb
100
+ - lib/junit_timing_splitter/parser.rb
101
+ - lib/junit_timing_splitter/schema.rb
102
+ - lib/junit_timing_splitter/splitter.rb
103
+ homepage: https://github.com/kimyu92/junit_timing_splitter
104
+ licenses:
105
+ - MIT
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '3.1'
116
+ - - "<"
117
+ - !ruby/object:Gem::Version
118
+ version: '4.0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubygems_version: 3.5.22
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Split test files into evenly distributed buckets based on execution time
129
+ test_files: []