smart-tests-cli 2.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. main/java/com/launchableinc/ingest/commits/Authenticator.java +14 -0
  2. main/java/com/launchableinc/ingest/commits/BUILD +28 -0
  3. main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java +479 -0
  4. main/java/com/launchableinc/ingest/commits/CommitIngester.java +188 -0
  5. main/java/com/launchableinc/ingest/commits/CountingDiffFormatter.java +53 -0
  6. main/java/com/launchableinc/ingest/commits/GitHubActionsAuthenticator.java +29 -0
  7. main/java/com/launchableinc/ingest/commits/GitHubIdTokenAuthenticator.java +59 -0
  8. main/java/com/launchableinc/ingest/commits/JSCommit.java +121 -0
  9. main/java/com/launchableinc/ingest/commits/JSFileChange.java +87 -0
  10. main/java/com/launchableinc/ingest/commits/ObjectRevFilter.java +27 -0
  11. main/java/com/launchableinc/ingest/commits/SSLBypass.java +49 -0
  12. main/java/com/launchableinc/ingest/commits/TokenAuthenticator.java +18 -0
  13. smart_tests_cli-2.0.0rc1.dist-info/METADATA +168 -0
  14. smart_tests_cli-2.0.0rc1.dist-info/RECORD +24 -0
  15. smart_tests_cli-2.0.0rc1.dist-info/WHEEL +5 -0
  16. smart_tests_cli-2.0.0rc1.dist-info/entry_points.txt +2 -0
  17. smart_tests_cli-2.0.0rc1.dist-info/licenses/LICENSE.txt +202 -0
  18. smart_tests_cli-2.0.0rc1.dist-info/top_level.txt +2 -0
  19. test/java/com/launchableinc/ingest/commits/AllTests.java +13 -0
  20. test/java/com/launchableinc/ingest/commits/BUILD +21 -0
  21. test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java +172 -0
  22. test/java/com/launchableinc/ingest/commits/CommitIngesterTest.java +76 -0
  23. test/java/com/launchableinc/ingest/commits/SSLBypassTest.java +13 -0
  24. test/java/com/launchableinc/ingest/commits/java8-compat.sh +23 -0
@@ -0,0 +1,14 @@
1
+ package com.launchableinc.ingest.commits;
2
+
3
+ import java.util.Collection;
4
+ import org.apache.http.Header;
5
+
6
+ /** Authenticator handles authentication between CommitGraphCollector and Launchable API */
7
+ public interface Authenticator {
8
+ /**
9
+ * Return a list of HTTP headers that are necessary for authentication
10
+ *
11
+ * @return A list of HTTP headers that are necessary for authentication
12
+ */
13
+ Collection<Header> getAuthenticationHeaders();
14
+ }
@@ -0,0 +1,28 @@
1
+ package(
2
+ default_visibility = ["//visibility:public"]
3
+ )
4
+
5
+ java_library(
6
+ name = "commits",
7
+ srcs = glob(["*.java"]),
8
+ deps = [
9
+ "@maven//:args4j_args4j",
10
+ "@maven//:com_fasterxml_jackson_core_jackson_annotations",
11
+ "@maven//:com_fasterxml_jackson_core_jackson_core",
12
+ "@maven//:com_fasterxml_jackson_core_jackson_databind",
13
+ "@maven//:com_google_guava_guava",
14
+ "@maven//:org_apache_httpcomponents_httpclient",
15
+ "@maven//:org_apache_httpcomponents_httpcore",
16
+ "@maven//:org_eclipse_jgit_org_eclipse_jgit",
17
+ "@maven//:org_slf4j_slf4j_api",
18
+ ],
19
+ )
20
+
21
+ java_binary(
22
+ name = "exe",
23
+ main_class = "com.launchableinc.ingest.commits.CommitIngester",
24
+ runtime_deps = [
25
+ ":commits",
26
+ "@maven//:org_slf4j_slf4j_jdk14",
27
+ ],
28
+ )
@@ -0,0 +1,479 @@
1
+ package com.launchableinc.ingest.commits;
2
+
3
+ import com.fasterxml.jackson.core.JsonFactory;
4
+ import com.fasterxml.jackson.core.JsonGenerator;
5
+ import com.fasterxml.jackson.core.JsonParser;
6
+ import com.fasterxml.jackson.core.JsonProcessingException;
7
+ import com.fasterxml.jackson.databind.ObjectMapper;
8
+ import com.fasterxml.jackson.databind.node.ObjectNode;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.io.CharStreams;
11
+ import org.apache.http.Header;
12
+ import org.apache.http.HttpResponse;
13
+ import org.apache.http.client.config.RequestConfig;
14
+ import org.apache.http.client.methods.CloseableHttpResponse;
15
+ import org.apache.http.client.methods.HttpGet;
16
+ import org.apache.http.client.methods.HttpPost;
17
+ import org.apache.http.entity.EntityTemplate;
18
+ import org.apache.http.impl.client.CloseableHttpClient;
19
+ import org.apache.http.impl.client.HttpClientBuilder;
20
+ import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm;
21
+ import org.eclipse.jgit.diff.DiffEntry;
22
+ import org.eclipse.jgit.errors.InvalidObjectIdException;
23
+ import org.eclipse.jgit.errors.MissingObjectException;
24
+ import org.eclipse.jgit.lib.ConfigConstants;
25
+ import org.eclipse.jgit.lib.ObjectId;
26
+ import org.eclipse.jgit.lib.ObjectReader;
27
+ import org.eclipse.jgit.lib.PersonIdent;
28
+ import org.eclipse.jgit.lib.Repository;
29
+ import org.eclipse.jgit.revwalk.RevCommit;
30
+ import org.eclipse.jgit.revwalk.RevSort;
31
+ import org.eclipse.jgit.revwalk.RevWalk;
32
+ import org.eclipse.jgit.revwalk.filter.CommitTimeRevFilter;
33
+ import org.eclipse.jgit.revwalk.filter.OrRevFilter;
34
+ import org.eclipse.jgit.submodule.SubmoduleWalk;
35
+ import org.slf4j.Logger;
36
+ import org.slf4j.LoggerFactory;
37
+
38
+ import java.io.ByteArrayInputStream;
39
+ import java.io.ByteArrayOutputStream;
40
+ import java.io.Closeable;
41
+ import java.io.IOException;
42
+ import java.io.InputStreamReader;
43
+ import java.io.OutputStream;
44
+ import java.io.UncheckedIOException;
45
+ import java.net.URL;
46
+ import java.nio.charset.StandardCharsets;
47
+ import java.util.ArrayList;
48
+ import java.util.Collection;
49
+ import java.util.List;
50
+ import java.util.Objects;
51
+ import java.util.Set;
52
+ import java.util.concurrent.TimeUnit;
53
+ import java.util.function.Consumer;
54
+ import java.util.function.Supplier;
55
+ import java.util.zip.GZIPInputStream;
56
+ import java.util.zip.GZIPOutputStream;
57
+
58
+ import static com.google.common.collect.ImmutableList.*;
59
+ import static java.util.Arrays.*;
60
+
61
+ /**
62
+ * Compares what commits the local repository and the remote repository have, then send delta over.
63
+ */
64
+ public class CommitGraphCollector {
65
+ private static final Logger logger = LoggerFactory.getLogger(CommitGraphCollector.class);
66
+ private static final ObjectMapper objectMapper = new ObjectMapper();
67
+ private static final int HTTP_TIMEOUT_MILLISECONDS = 15_000;
68
+
69
+ /**
70
+ * Root repository to start processing.
71
+ *
72
+ * <p>Sub modules form a tree structure rooted at this repository.
73
+ */
74
+ private final Repository root;
75
+
76
+ private int commitsSent;
77
+
78
+ private boolean collectCommitMessage;
79
+
80
+ private int maxDays;
81
+
82
+ private boolean audit;
83
+
84
+ private boolean dryRun;
85
+
86
+ private boolean warnMissingObject;
87
+
88
+ private String dryRunPrefix() {
89
+ if (!dryRun) {
90
+ return "";
91
+ }
92
+ return "(DRY RUN) ";
93
+ }
94
+
95
+ private boolean outputAuditLog() {
96
+ return audit || dryRun;
97
+ }
98
+
99
+ public CommitGraphCollector(Repository git) {
100
+ this.root = git;
101
+ }
102
+
103
+ /** How many commits did we transfer? */
104
+ public int getCommitsSent() {
105
+ return commitsSent;
106
+ }
107
+
108
+ private String dumpHeaderAsJson(Header[] headers) throws JsonProcessingException {
109
+ ObjectNode header = objectMapper.createObjectNode();
110
+ for (Header h : headers) {
111
+ header.put(h.getName(), h.getValue());
112
+ }
113
+ return objectMapper.writeValueAsString(header);
114
+ }
115
+
116
+ /** Transfers the commits to the remote endpoint. */
117
+ public void transfer(URL service, Authenticator authenticator, boolean enableTimeout) throws IOException {
118
+ URL url;
119
+ HttpClientBuilder builder =
120
+ HttpClientBuilder.create()
121
+ .useSystemProperties()
122
+ .setDefaultHeaders(authenticator.getAuthenticationHeaders());
123
+ if (enableTimeout) {
124
+ RequestConfig config = RequestConfig.custom()
125
+ .setConnectTimeout(HTTP_TIMEOUT_MILLISECONDS)
126
+ .setConnectionRequestTimeout(HTTP_TIMEOUT_MILLISECONDS)
127
+ .setSocketTimeout(HTTP_TIMEOUT_MILLISECONDS).build();
128
+ builder.setDefaultRequestConfig(config);
129
+ }
130
+ try (CloseableHttpClient client = builder.build()) {
131
+ url = new URL(service, "latest");
132
+ if (outputAuditLog()) {
133
+ System.err.printf(
134
+ "AUDIT:launchable:%ssend request method:get path: %s%n", dryRunPrefix(), url);
135
+ }
136
+ CloseableHttpResponse latestResponse = client.execute(new HttpGet(url.toExternalForm()));
137
+ ImmutableList<ObjectId> advertised = getAdvertisedRefs(handleError(url, latestResponse));
138
+ honorMaxDaysHeader(latestResponse);
139
+
140
+ // every time a new stream is needed, supply ByteArrayOutputStream, and when the data is all
141
+ // written, turn around and ship that over
142
+ transfer(
143
+ advertised,
144
+ () -> {
145
+ try {
146
+ return new GZIPOutputStream(
147
+ new ByteArrayOutputStream() {
148
+ @Override
149
+ public void close() throws IOException {
150
+ URL url = new URL(service, "collect");
151
+ HttpPost request = new HttpPost(url.toExternalForm());
152
+ request.setHeader("Content-Type", "application/json");
153
+ request.setHeader("Content-Encoding", "gzip");
154
+ request.setEntity(new EntityTemplate(this::writeTo));
155
+
156
+ if (outputAuditLog()) {
157
+ InputStreamReader gzip =
158
+ new InputStreamReader(
159
+ new GZIPInputStream(new ByteArrayInputStream(toByteArray())),
160
+ StandardCharsets.UTF_8);
161
+ String json = CharStreams.toString(gzip);
162
+ System.err.printf(
163
+ "AUDIT:launchable:%ssend request method:post path:%s headers:%s"
164
+ + " args:%s%n",
165
+ dryRunPrefix(), url, dumpHeaderAsJson(request.getAllHeaders()), json);
166
+ }
167
+ if (dryRun) {
168
+ return;
169
+ }
170
+ handleError(url, client.execute(request));
171
+ }
172
+ });
173
+ } catch (IOException e) {
174
+ throw new UncheckedIOException(e);
175
+ }
176
+ },
177
+ 256);
178
+ }
179
+ }
180
+
181
+ /**
182
+ * When a user incorrectly configures shallow clone, the incremental nature of commit collection
183
+ * makes it really hard for us and users to collaboratively reset and repopulate the commit data.
184
+ * This server-side override mechanism makes it easier.
185
+ */
186
+ private void honorMaxDaysHeader(HttpResponse response) {
187
+ Header h = response.getFirstHeader("X-Max-Days");
188
+ if (h!=null) {
189
+ maxDays = Integer.parseInt(h.getValue());
190
+ }
191
+ }
192
+
193
+ private ImmutableList<ObjectId> getAdvertisedRefs(HttpResponse response) throws IOException {
194
+ JsonParser parser = new JsonFactory().createParser(response.getEntity().getContent());
195
+ String[] ids = objectMapper.readValue(parser, String[].class);
196
+ return stream(ids)
197
+ .map(
198
+ s -> {
199
+ try {
200
+ return ObjectId.fromString(s);
201
+ } catch (InvalidObjectIdException e) {
202
+ // if the server sends us a bogus data, don't penalize users, silently drop that
203
+ return null;
204
+ }
205
+ })
206
+ .filter(Objects::nonNull)
207
+ .collect(toImmutableList());
208
+ }
209
+
210
+ /**
211
+ * Writes delta between local commits to the advertised to JSON stream.
212
+ *
213
+ * @param streams Commits are written to streams provided by this {@link Supplier}, in the given
214
+ * chunk size.
215
+ */
216
+ public void transfer(
217
+ Collection<ObjectId> advertised, Supplier<OutputStream> streams, int chunkSize)
218
+ throws IOException {
219
+ try (ChunkStreamer cs = new ChunkStreamer(streams, chunkSize)) {
220
+ new ByRepository(root).transfer(advertised, cs);
221
+ }
222
+ }
223
+
224
+ /**
225
+ * {@link Consumer} that groups commits into chunks and write them as JSON, using streams supplied
226
+ * by the factory.
227
+ */
228
+ private static final class ChunkStreamer implements Consumer<JSCommit>, Closeable {
229
+
230
+ private final Supplier<OutputStream> streams;
231
+ private JsonGenerator w;
232
+ /** Count # of items we wrote to this stream. */
233
+ private int count;
234
+
235
+ private final int chunkSize;
236
+
237
+ ChunkStreamer(Supplier<OutputStream> streams, int chunkSize) {
238
+ this.streams = streams;
239
+ this.chunkSize = chunkSize;
240
+ }
241
+
242
+ @Override
243
+ public void accept(JSCommit commit) {
244
+ try {
245
+ if (w == null) {
246
+ open();
247
+ }
248
+ w.writeObject(commit);
249
+ if (++count >= chunkSize) {
250
+ close();
251
+ }
252
+ } catch (IOException e) {
253
+ throw new UncheckedIOException(e);
254
+ }
255
+ }
256
+
257
+ public void open() throws IOException {
258
+ w = new JsonFactory().createGenerator(streams.get()).useDefaultPrettyPrinter();
259
+ w.setCodec(objectMapper);
260
+ w.writeStartObject();
261
+ w.writeArrayFieldStart("commits");
262
+ }
263
+
264
+ @Override
265
+ public void close() throws IOException {
266
+ if (w == null) {
267
+ return; // already closed
268
+ }
269
+ w.writeEndArray();
270
+ w.writeEndObject();
271
+ w.close();
272
+ w = null;
273
+ count = 0;
274
+ }
275
+ }
276
+
277
+ /** Pass through {@link CloseableHttpResponse} but checks and throws an error. */
278
+ private CloseableHttpResponse handleError(URL url, CloseableHttpResponse response)
279
+ throws IOException {
280
+ int code = response.getStatusLine().getStatusCode();
281
+ if (code >= 400) {
282
+ throw new IOException(
283
+ String.format(
284
+ "Failed to retrieve from %s: %s%n%s",
285
+ url,
286
+ response.getStatusLine(),
287
+ CharStreams.toString(
288
+ new InputStreamReader(
289
+ response.getEntity().getContent(), StandardCharsets.UTF_8))));
290
+ }
291
+ return response;
292
+ }
293
+
294
+ public void collectCommitMessage(boolean commitMessage) {
295
+ this.collectCommitMessage = commitMessage;
296
+ }
297
+
298
+ public void setMaxDays(int days) {
299
+ this.maxDays = days;
300
+ }
301
+
302
+ public void setAudit(boolean audit) {
303
+ this.audit = audit;
304
+ }
305
+
306
+ public void setDryRun(boolean dryRun) {
307
+ this.dryRun = dryRun;
308
+ }
309
+
310
+ /** Process commits per repository. */
311
+ final class ByRepository implements AutoCloseable {
312
+
313
+ private final Repository git;
314
+
315
+ private final ObjectReader objectReader;
316
+ private final Set<ObjectId> shallowCommits;
317
+
318
+ ByRepository(Repository git) throws IOException {
319
+ this.git = git;
320
+ this.objectReader = git.newObjectReader();
321
+ this.shallowCommits = objectReader.getShallowCommits();
322
+ }
323
+
324
+ /**
325
+ * Writes delta between local commits to the advertised to JSON stream.
326
+ *
327
+ * @param receiver Receives commits that should be sent, one by one.
328
+ */
329
+ public void transfer(Collection<ObjectId> advertised, Consumer<JSCommit> receiver)
330
+ throws IOException {
331
+ try (RevWalk walk = new RevWalk(git)) {
332
+ // walk reverse topological order, so that older commits get added to the server earlier.
333
+ // This way, the connectivity of the commit graph will be always maintained
334
+ walk.sort(RevSort.TOPO);
335
+ walk.sort(RevSort.REVERSE, true);
336
+ // also combine this with commit time based ordering, so that we can stop walking when we
337
+ // find old enough commits AFAICT, this is no-op in JGit and it always sorts things in
338
+ // commit time order, but it is in the contract, so I'm assuming we shouldn't rely on the
339
+ // implementation optimization that's currently enabling this all the time
340
+ walk.sort(RevSort.COMMIT_TIME_DESC, true);
341
+
342
+ ObjectId headId = git.resolve("HEAD");
343
+ walk.markStart(walk.parseCommit(headId));
344
+
345
+ // don't walk commits too far back.
346
+ // for our purpose of computing CUT, these are unlikely to contribute meaningfully
347
+ // and it drastically cuts down the initial commit consumption of a new large repository.
348
+ // ... except we do want to capture the head commit, as that makes it easier to spot integration problems
349
+ // when `record build` and `record commit` are separated.
350
+ walk.setRevFilter(
351
+ OrRevFilter.create(
352
+ CommitTimeRevFilter.after(System.currentTimeMillis() - TimeUnit.DAYS.toMillis(maxDays)),
353
+ new ObjectRevFilter(headId)));
354
+
355
+
356
+ for (ObjectId id : advertised) {
357
+ try {
358
+ RevCommit c = walk.parseCommit(id);
359
+ walk.markUninteresting(c);
360
+ } catch (MissingObjectException e) {
361
+ // it's possible that the server advertises a commit we don't have.
362
+ //
363
+ // TODO: how does git-push handles the case when the client doesn't recognize commits?
364
+ // Unless it tries to negotiate further what commits they have in common,
365
+ // git-upload-pack can end up creating a big pack with lots of redundant objects
366
+ //
367
+ // think about a case when a client is pushing a new branch against
368
+ // the master branch that moved on the server.
369
+ }
370
+ }
371
+
372
+ // walk the commits, transform them, and send them to the receiver
373
+ for (RevCommit c : walk) {
374
+ JSCommit d = transform(c);
375
+ receiver.accept(d);
376
+ commitsSent++;
377
+ }
378
+ }
379
+
380
+ /*
381
+ Git submodule support
382
+ =====================
383
+
384
+ In a fully general version of the problem, every commit we are walking might point to
385
+ different sub-module at different commit, so we should be walking over all of those.
386
+ That will require us to resolve sub-modules, since there's no guarantee that those submodules
387
+ are cloned and available.
388
+
389
+ Here, we solve a weaker version of this, that works well enough for `launchable build record`
390
+ and obtain commits needed to determine the subject.
391
+
392
+ That is, find submodules that are available in the working tree (thus `!isBare()`), and
393
+ collect all the commits from those repositories.
394
+ */
395
+ if (!git.isBare()) {
396
+ try (SubmoduleWalk swalk = SubmoduleWalk.forIndex(git)) {
397
+ while (swalk.next()) {
398
+ try (Repository subRepo = swalk.getRepository()) {
399
+ if (subRepo != null) {
400
+ try (ByRepository br = new ByRepository(subRepo)) {
401
+ br.transfer(advertised, receiver);
402
+ }
403
+ }
404
+ }
405
+ }
406
+ }
407
+ }
408
+ }
409
+
410
+ private JSCommit transform(RevCommit r) throws IOException {
411
+ JSCommit c = new JSCommit();
412
+ c.setCommitHash(r.name());
413
+ c.setMessage(collectCommitMessage ? r.getFullMessage() : "");
414
+
415
+ PersonIdent author = r.getAuthorIdent();
416
+ c.setAuthorEmailAddress(JSCommit.hashEmail(author.getEmailAddress()));
417
+ c.setAuthorWhen(author.getWhen().getTime());
418
+ c.setAuthorTimezoneOffset(author.getTimeZoneOffset());
419
+
420
+ PersonIdent committer = r.getCommitterIdent();
421
+ c.setCommitterEmailAddress(JSCommit.hashEmail(committer.getEmailAddress()));
422
+ c.setCommitterWhen(committer.getWhen().getTime());
423
+ c.setCommitterTimezoneOffset(committer.getTimeZoneOffset());
424
+
425
+ // Change the on-memory config for the diff algorithm.
426
+ // CGit supports patience diff while JGit doesn't. Since the FileBasedRepository reads the
427
+ // user's .gitconfig, if a user sets this
428
+ // algorithm, JGit causes a failure. Changing this on-memory avoids this.
429
+ git.getConfig()
430
+ .setEnum(
431
+ ConfigConstants.CONFIG_DIFF_SECTION,
432
+ null,
433
+ ConfigConstants.CONFIG_KEY_ALGORITHM,
434
+ SupportedAlgorithm.HISTOGRAM);
435
+
436
+
437
+ if (shallowCommits.contains(r)) {
438
+ c.setShallow(true);
439
+ warnMissingObject();
440
+ }
441
+
442
+ for (RevCommit p : r.getParents()) {
443
+ CountingDiffFormatter diff = new CountingDiffFormatter(git);
444
+ List<DiffEntry> files = diff.scan(p.getTree(), r.getTree());
445
+ List<JSFileChange> changes = new ArrayList<>();
446
+ for (DiffEntry de : files) {
447
+ try {
448
+ changes.add(diff.process(de));
449
+ } catch (MissingObjectException e) {
450
+ // in a partially cloned repository, BLOBs might be unavailable and that'd result in MissingObjectException
451
+ System.err.printf("Warning: %s is missing. Skipping diff calculation for %s -> %s%n",
452
+ e.getObjectId().abbreviate(7).name(),
453
+ p.abbreviate(7).name(),
454
+ r.abbreviate(7).name()
455
+ );
456
+ warnMissingObject();
457
+ } catch (IOException e) {
458
+ logger.warn("Failed to process a change to a file", e);
459
+ }
460
+ }
461
+ c.getParentHashes().put(p.name(), changes);
462
+ }
463
+
464
+ return c;
465
+ }
466
+
467
+ private void warnMissingObject() {
468
+ if (!warnMissingObject) {
469
+ warnMissingObject = true;
470
+ System.err.println("See https://www.launchableinc.com/missing-git-object-during-commit-collection");
471
+ }
472
+ }
473
+
474
+ @Override
475
+ public void close() {
476
+ objectReader.close();
477
+ }
478
+ }
479
+ }