getdocs 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {getdocs-0.1.0/src/getdocs.egg-info → getdocs-0.2.0}/PKG-INFO +49 -1
- {getdocs-0.1.0 → getdocs-0.2.0}/README.md +48 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/pyproject.toml +1 -1
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/cli.py +19 -4
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/config.py +1 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/engine.py +11 -4
- getdocs-0.2.0/src/getdocs/outcome.py +75 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/source.py +14 -7
- {getdocs-0.1.0 → getdocs-0.2.0/src/getdocs.egg-info}/PKG-INFO +49 -1
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs.egg-info/SOURCES.txt +3 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_source.py +7 -2
- getdocs-0.2.0/tests/test_summary_clone_e2e.py +88 -0
- getdocs-0.2.0/tests/test_summary_e2e.py +138 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/LICENSE +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/setup.cfg +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/__init__.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/__main__.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/api.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/extract.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/identity.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/jobs.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/navharvest.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/output.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/scope.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/sitemap.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs/urlnorm.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs.egg-info/dependency_links.txt +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs.egg-info/entry_points.txt +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs.egg-info/requires.txt +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/src/getdocs.egg-info/top_level.txt +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_api.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_cli.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_crawl_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_extract.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_extract_pipeline.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_identity.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_identity_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_jobs.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_jsonl_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_jsonl_output.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_limits_errors_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_media_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_navharvest.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_navorder_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_output.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_politeness_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_relink_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_render_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_resume_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_scope.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_shell_detection.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_sitemap.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_sitemap_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_traversal_e2e.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_urlnorm.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_webhook_api.py +0 -0
- {getdocs-0.1.0 → getdocs-0.2.0}/tests/test_ws_api.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: getdocs
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Documentation crawler: recursively crawl a docs site and emit clean markdown
|
|
5
5
|
Author-email: jonbakerfish <jonbakerfish@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -113,6 +113,54 @@ Sitemap discovery, JavaScript rendering, source-repo cloning, polite
|
|
|
113
113
|
throttling, JSONL output, and resumable crawls are all built in — see
|
|
114
114
|
[docs/USAGE.md](docs/USAGE.md).
|
|
115
115
|
|
|
116
|
+
## Use with your agent
|
|
117
|
+
|
|
118
|
+
getdocs is built to be driven by a coding agent: it's an ordinary CLI whose
|
|
119
|
+
`out/` tree + `crawl.json` Manifest *is* the return value (no MCP server, no job
|
|
120
|
+
protocol — see [ADR-0007](docs/adr/0007-agent-integration-is-the-cli-not-an-mcp-surface.md)).
|
|
121
|
+
Two patterns cover most uses.
|
|
122
|
+
|
|
123
|
+
**Synchronous — fetch one docs section.** Scope defaults to the seed's host +
|
|
124
|
+
path prefix, so pointing at a subtree fetches just that subtree. This blocks
|
|
125
|
+
until done and works under any agent:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
getdocs crawl https://example.com/docs/auth -o ./out --summary-json
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**Background — mirror a whole site.** Kick the crawl off as a background task
|
|
132
|
+
and keep working. Under **Claude Code** the agent is resumed automatically when
|
|
133
|
+
the crawl finishes; every other agent surveyed blocks or polls the output path
|
|
134
|
+
instead (this is a harness feature, not a getdocs one):
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
getdocs crawl https://example.com/docs -o ./out --summary-json &
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Read the summary, branch on the Outcome.** Every run ends with a one-line
|
|
141
|
+
summary on stderr; `--summary-json` adds a machine-readable object discriminated
|
|
142
|
+
by `outcome`. A run produces exactly one Outcome — a Crawl or a Clone:
|
|
143
|
+
|
|
144
|
+
```jsonc
|
|
145
|
+
// outcome: "crawled" — getdocs scraped the rendered site
|
|
146
|
+
{ "outcome": "crawled", "status": "ok", "pages": 42,
|
|
147
|
+
"output_dir": "./out", "manifest": "./out/crawl.json", "truncated": false }
|
|
148
|
+
|
|
149
|
+
// outcome: "cloned" — the docs were open-source, so getdocs cloned the repo
|
|
150
|
+
// (no pages / no manifest: a Clone is not a Crawl)
|
|
151
|
+
{ "outcome": "cloned", "status": "ok", "repo": "acme/docs",
|
|
152
|
+
"output_dir": "./out/docs", "mkdocs_config": "./out/mkdocs.yml" }
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Have the agent switch on `outcome`:
|
|
156
|
+
|
|
157
|
+
- **`crawled`** → grep and read the Pages under `output_dir` and follow the nav
|
|
158
|
+
/ reading order in `manifest` (`crawl.json`).
|
|
159
|
+
- **`cloned`** → you have the original markdown source; serve it with
|
|
160
|
+
`mkdocs serve -f <mkdocs_config>` (or just read the files under `output_dir`).
|
|
161
|
+
- **`status: "truncated"`** → the crawl hit its page cap; re-run with a higher
|
|
162
|
+
`--limit` (or `0` for unlimited) to get the rest.
|
|
163
|
+
|
|
116
164
|
## Install
|
|
117
165
|
|
|
118
166
|
Requires **Python 3.12+**.
|
|
@@ -73,6 +73,54 @@ Sitemap discovery, JavaScript rendering, source-repo cloning, polite
|
|
|
73
73
|
throttling, JSONL output, and resumable crawls are all built in — see
|
|
74
74
|
[docs/USAGE.md](docs/USAGE.md).
|
|
75
75
|
|
|
76
|
+
## Use with your agent
|
|
77
|
+
|
|
78
|
+
getdocs is built to be driven by a coding agent: it's an ordinary CLI whose
|
|
79
|
+
`out/` tree + `crawl.json` Manifest *is* the return value (no MCP server, no job
|
|
80
|
+
protocol — see [ADR-0007](docs/adr/0007-agent-integration-is-the-cli-not-an-mcp-surface.md)).
|
|
81
|
+
Two patterns cover most uses.
|
|
82
|
+
|
|
83
|
+
**Synchronous — fetch one docs section.** Scope defaults to the seed's host +
|
|
84
|
+
path prefix, so pointing at a subtree fetches just that subtree. This blocks
|
|
85
|
+
until done and works under any agent:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
getdocs crawl https://example.com/docs/auth -o ./out --summary-json
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Background — mirror a whole site.** Kick the crawl off as a background task
|
|
92
|
+
and keep working. Under **Claude Code** the agent is resumed automatically when
|
|
93
|
+
the crawl finishes; every other agent surveyed blocks or polls the output path
|
|
94
|
+
instead (this is a harness feature, not a getdocs one):
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
getdocs crawl https://example.com/docs -o ./out --summary-json &
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Read the summary, branch on the Outcome.** Every run ends with a one-line
|
|
101
|
+
summary on stderr; `--summary-json` adds a machine-readable object discriminated
|
|
102
|
+
by `outcome`. A run produces exactly one Outcome — a Crawl or a Clone:
|
|
103
|
+
|
|
104
|
+
```jsonc
|
|
105
|
+
// outcome: "crawled" — getdocs scraped the rendered site
|
|
106
|
+
{ "outcome": "crawled", "status": "ok", "pages": 42,
|
|
107
|
+
"output_dir": "./out", "manifest": "./out/crawl.json", "truncated": false }
|
|
108
|
+
|
|
109
|
+
// outcome: "cloned" — the docs were open-source, so getdocs cloned the repo
|
|
110
|
+
// (no pages / no manifest: a Clone is not a Crawl)
|
|
111
|
+
{ "outcome": "cloned", "status": "ok", "repo": "acme/docs",
|
|
112
|
+
"output_dir": "./out/docs", "mkdocs_config": "./out/mkdocs.yml" }
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Have the agent switch on `outcome`:
|
|
116
|
+
|
|
117
|
+
- **`crawled`** → grep and read the Pages under `output_dir` and follow the nav
|
|
118
|
+
/ reading order in `manifest` (`crawl.json`).
|
|
119
|
+
- **`cloned`** → you have the original markdown source; serve it with
|
|
120
|
+
`mkdocs serve -f <mkdocs_config>` (or just read the files under `output_dir`).
|
|
121
|
+
- **`status: "truncated"`** → the crawl hit its page cap; re-run with a higher
|
|
122
|
+
`--limit` (or `0` for unlimited) to get the rest.
|
|
123
|
+
|
|
76
124
|
## Install
|
|
77
125
|
|
|
78
126
|
Requires **Python 3.12+**.
|
|
@@ -67,6 +67,11 @@ def parse_args(argv: list[str] | None = None) -> CrawlConfig | ServeConfig:
|
|
|
67
67
|
"--format", choices=["files", "jsonl"], default="files",
|
|
68
68
|
help="files: .md tree + crawl.json; jsonl: one record per Page on stdout",
|
|
69
69
|
)
|
|
70
|
+
crawl.add_argument(
|
|
71
|
+
"--summary-json", dest="summary_json", action="store_true",
|
|
72
|
+
help="Print a machine-readable Outcome summary (files mode: to stdout; "
|
|
73
|
+
"jsonl mode: already carried by the final Manifest record)",
|
|
74
|
+
)
|
|
70
75
|
crawl.add_argument(
|
|
71
76
|
"--selector", metavar="CSS",
|
|
72
77
|
help="CSS selector for the content container (overrides auto-detection)",
|
|
@@ -146,6 +151,7 @@ def parse_args(argv: list[str] | None = None) -> CrawlConfig | ServeConfig:
|
|
|
146
151
|
depth=args.depth,
|
|
147
152
|
limit=args.limit,
|
|
148
153
|
format=args.format,
|
|
154
|
+
summary_json=args.summary_json,
|
|
149
155
|
keep_html=args.keep_html,
|
|
150
156
|
sitemap=args.sitemap,
|
|
151
157
|
selector=args.selector,
|
|
@@ -195,7 +201,11 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
195
201
|
if config.format == "files" and config.clone_source and not config.resume and config.seeds:
|
|
196
202
|
from getdocs.source import clone_source_for
|
|
197
203
|
|
|
198
|
-
|
|
204
|
+
clone = clone_source_for(config)
|
|
205
|
+
if clone is not None:
|
|
206
|
+
print(clone.stderr_line(), file=sys.stderr, flush=True)
|
|
207
|
+
if config.summary_json:
|
|
208
|
+
print(json.dumps(clone.summary_json()), flush=True)
|
|
199
209
|
return 0
|
|
200
210
|
state_file = state_file_for(config)
|
|
201
211
|
if config.resume:
|
|
@@ -212,9 +222,14 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
212
222
|
)
|
|
213
223
|
state_file.unlink()
|
|
214
224
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
225
|
+
outcome = run_crawl(config)
|
|
226
|
+
# Always-on one-line summary on stderr (stdout belongs to the jsonl stream,
|
|
227
|
+
# ADR-0002). The opt-in --summary-json object goes to stdout in files mode;
|
|
228
|
+
# in jsonl mode the final Manifest record already carries the same facts.
|
|
229
|
+
print(outcome.stderr_line(), file=sys.stderr, flush=True)
|
|
230
|
+
if config.summary_json and config.format == "files":
|
|
231
|
+
print(json.dumps(outcome.summary_json()), flush=True)
|
|
232
|
+
if outcome.status == "empty":
|
|
218
233
|
print("error: no Pages produced — seed(s) unreachable?", file=sys.stderr, flush=True)
|
|
219
234
|
return 1
|
|
220
235
|
return 0
|
|
@@ -15,6 +15,7 @@ class CrawlConfig:
|
|
|
15
15
|
depth: int = 0 # link-hops from any seed; 0 = unlimited
|
|
16
16
|
limit: int = 1000 # max Pages per Crawl; 0 = unlimited
|
|
17
17
|
format: str = "files" # "files" or "jsonl"
|
|
18
|
+
summary_json: bool = False # emit a machine-readable Outcome summary object
|
|
18
19
|
keep_html: bool = False
|
|
19
20
|
sitemap: str = "both" # "both", "off" (--no-sitemap), or "only" (--sitemap-only)
|
|
20
21
|
selector: str | None = None # CSS selector naming the content container
|
|
@@ -25,6 +25,7 @@ from getdocs.config import CrawlConfig
|
|
|
25
25
|
from getdocs.extract import extract_page, is_shell
|
|
26
26
|
from getdocs.identity import build_user_agent
|
|
27
27
|
from getdocs.navharvest import harvest_nav, merge_harvests
|
|
28
|
+
from getdocs.outcome import CrawlOutcome
|
|
28
29
|
from getdocs.output import AssetStore, FileTreeWriter, JsonlWriter, PageRecord, relink_pages
|
|
29
30
|
from getdocs.scope import Scope
|
|
30
31
|
from getdocs.sitemap import parse_robots_sitemaps, parse_sitemap_xml
|
|
@@ -343,8 +344,8 @@ class _CrawlSpider(scrapy.Spider):
|
|
|
343
344
|
)
|
|
344
345
|
|
|
345
346
|
|
|
346
|
-
def run_crawl(config: CrawlConfig) ->
|
|
347
|
-
"""Run a Crawl to completion; returns the
|
|
347
|
+
def run_crawl(config: CrawlConfig) -> CrawlOutcome:
|
|
348
|
+
"""Run a Crawl to completion; returns the structured Outcome it produced."""
|
|
348
349
|
if config.format == "jsonl":
|
|
349
350
|
writer = JsonlWriter(sys.stdout)
|
|
350
351
|
else:
|
|
@@ -405,7 +406,7 @@ def run_crawl(config: CrawlConfig) -> int:
|
|
|
405
406
|
if isinstance(writer, FileTreeWriter):
|
|
406
407
|
relink_pages(writer, outcome["crawl_sequence"])
|
|
407
408
|
nav, reading_order = merge_harvests(outcome["harvests"], outcome["crawl_sequence"])
|
|
408
|
-
writer.write_manifest(
|
|
409
|
+
manifest_path = writer.write_manifest(
|
|
409
410
|
seeds=config.seeds,
|
|
410
411
|
errors=outcome["errors"],
|
|
411
412
|
truncated=outcome["truncated"],
|
|
@@ -415,4 +416,10 @@ def run_crawl(config: CrawlConfig) -> int:
|
|
|
415
416
|
reading_order=reading_order,
|
|
416
417
|
media_skipped=outcome["media_skipped"],
|
|
417
418
|
)
|
|
418
|
-
return
|
|
419
|
+
return CrawlOutcome(
|
|
420
|
+
pages=writer.page_count,
|
|
421
|
+
output_dir=config.output_dir,
|
|
422
|
+
manifest=manifest_path,
|
|
423
|
+
truncated=outcome["truncated"],
|
|
424
|
+
format=config.format,
|
|
425
|
+
)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Outcome: the structured result of a getdocs run — a Crawl or a Clone (ADR-0007).
|
|
2
|
+
|
|
3
|
+
A run produces exactly one Outcome — a Crawl or a Clone. The always-on stderr
|
|
4
|
+
summary line and the opt-in --summary-json object are both rendered from the
|
|
5
|
+
same Outcome value, so the two surfaces cannot disagree.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class CrawlOutcome:
|
|
14
|
+
"""What a Crawl produced: how many Pages, where they landed, whether capped."""
|
|
15
|
+
|
|
16
|
+
pages: int
|
|
17
|
+
output_dir: Path
|
|
18
|
+
manifest: Path | None # crawl.json path in files mode; None in jsonl mode
|
|
19
|
+
truncated: bool
|
|
20
|
+
format: str = "files" # "files" or "jsonl"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def status(self) -> str:
|
|
24
|
+
"""ok | truncated | empty — derived so the line and JSON always agree."""
|
|
25
|
+
if self.pages == 0:
|
|
26
|
+
return "empty"
|
|
27
|
+
if self.truncated:
|
|
28
|
+
return "truncated"
|
|
29
|
+
return "ok"
|
|
30
|
+
|
|
31
|
+
def stderr_line(self) -> str:
|
|
32
|
+
if self.format == "jsonl":
|
|
33
|
+
dest = "stdout (jsonl)"
|
|
34
|
+
else:
|
|
35
|
+
dest = f"{self.output_dir} ({self.manifest.name})"
|
|
36
|
+
note = " [truncated]" if self.truncated else ""
|
|
37
|
+
return f"getdocs: crawled {self.pages} Pages → {dest}{note}"
|
|
38
|
+
|
|
39
|
+
def summary_json(self) -> dict:
|
|
40
|
+
return {
|
|
41
|
+
"outcome": "crawled",
|
|
42
|
+
"status": self.status,
|
|
43
|
+
"pages": self.pages,
|
|
44
|
+
"output_dir": str(self.output_dir),
|
|
45
|
+
"manifest": str(self.manifest),
|
|
46
|
+
"truncated": self.truncated,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class CloneOutcome:
|
|
52
|
+
"""What a Clone produced: the source repo, where it landed, its serve config.
|
|
53
|
+
|
|
54
|
+
A Clone is a sibling of a Crawl, not a kind of Crawl (CONTEXT.md): it carries
|
|
55
|
+
no Pages and no Manifest, so the summary omits those keys entirely.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
repo: str # source-repo identity, e.g. "acme/docs"
|
|
59
|
+
output_dir: Path # where the clone landed, e.g. ./out/docs
|
|
60
|
+
mkdocs_config: Path | None # generated/own serve config; None if none was written
|
|
61
|
+
|
|
62
|
+
status: str = "ok" # producing a Clone at all means it succeeded
|
|
63
|
+
|
|
64
|
+
def stderr_line(self) -> str:
|
|
65
|
+
cfg = f" ({self.mkdocs_config.name})" if self.mkdocs_config else ""
|
|
66
|
+
return f"getdocs: cloned {self.repo} → {self.output_dir}{cfg}"
|
|
67
|
+
|
|
68
|
+
def summary_json(self) -> dict:
|
|
69
|
+
return {
|
|
70
|
+
"outcome": "cloned",
|
|
71
|
+
"status": self.status,
|
|
72
|
+
"repo": self.repo,
|
|
73
|
+
"output_dir": str(self.output_dir),
|
|
74
|
+
"mkdocs_config": str(self.mkdocs_config) if self.mkdocs_config else None,
|
|
75
|
+
}
|
|
@@ -24,6 +24,7 @@ from bs4 import BeautifulSoup
|
|
|
24
24
|
|
|
25
25
|
from getdocs.config import CrawlConfig
|
|
26
26
|
from getdocs.identity import build_user_agent
|
|
27
|
+
from getdocs.outcome import CloneOutcome
|
|
27
28
|
|
|
28
29
|
# Hosts whose /ORG/REPO paths we recognize as clonable repositories.
|
|
29
30
|
_GIT_HOSTS = {"github.com", "gitlab.com", "bitbucket.org", "codeberg.org"}
|
|
@@ -191,12 +192,17 @@ def write_mkdocs_config(output_dir: Path, docs_dir: Path, site_name: str) -> Pat
|
|
|
191
192
|
return path
|
|
192
193
|
|
|
193
194
|
|
|
194
|
-
def
|
|
195
|
+
def _repo_identity(repo_url: str) -> str:
|
|
196
|
+
"""Short owner/repo identity for a canonical repo URL ("acme/docs")."""
|
|
197
|
+
return "/".join(s for s in urlsplit(repo_url).path.split("/") if s)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def clone_source_for(config: CrawlConfig) -> CloneOutcome | None:
|
|
195
201
|
"""Try to satisfy a crawl by cloning the docs' source repo instead.
|
|
196
202
|
|
|
197
|
-
Returns
|
|
198
|
-
|
|
199
|
-
Progress
|
|
203
|
+
Returns a CloneOutcome when the site is open-source and was cloned (the
|
|
204
|
+
caller should then skip crawling and report it); None to fall back to
|
|
205
|
+
crawling. Progress is reported on stderr (stdout is the jsonl stream).
|
|
200
206
|
"""
|
|
201
207
|
if not config.seeds:
|
|
202
208
|
return None
|
|
@@ -220,19 +226,20 @@ def clone_source_for(config: CrawlConfig) -> Path | None:
|
|
|
220
226
|
if repo_dir is None:
|
|
221
227
|
print("clone failed (git missing or repo unreachable) — crawling instead", file=sys.stderr)
|
|
222
228
|
return None
|
|
229
|
+
repo = _repo_identity(repo_url)
|
|
223
230
|
|
|
224
231
|
own_config = repo_dir / "mkdocs.yml"
|
|
225
232
|
if own_config.exists():
|
|
226
233
|
print(f"cloned to {repo_dir} (ships its own mkdocs.yml)", file=sys.stderr)
|
|
227
234
|
print(f"serve it with: mkdocs serve -f {own_config}", file=sys.stderr)
|
|
228
|
-
return repo_dir
|
|
235
|
+
return CloneOutcome(repo=repo, output_dir=repo_dir, mkdocs_config=own_config)
|
|
229
236
|
|
|
230
237
|
docs_dir = find_docs_dir(repo_dir)
|
|
231
238
|
if docs_dir is None:
|
|
232
239
|
print(f"cloned to {repo_dir}, but found no markdown docs to serve", file=sys.stderr)
|
|
233
|
-
return repo_dir
|
|
240
|
+
return CloneOutcome(repo=repo, output_dir=repo_dir, mkdocs_config=None)
|
|
234
241
|
|
|
235
242
|
written = write_mkdocs_config(config.output_dir, docs_dir, host)
|
|
236
243
|
print(f"cloned to {repo_dir}; wrote {written}", file=sys.stderr)
|
|
237
244
|
print(f"serve it with: mkdocs serve -f {written}", file=sys.stderr)
|
|
238
|
-
return repo_dir
|
|
245
|
+
return CloneOutcome(repo=repo, output_dir=repo_dir, mkdocs_config=written)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: getdocs
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Documentation crawler: recursively crawl a docs site and emit clean markdown
|
|
5
5
|
Author-email: jonbakerfish <jonbakerfish@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -113,6 +113,54 @@ Sitemap discovery, JavaScript rendering, source-repo cloning, polite
|
|
|
113
113
|
throttling, JSONL output, and resumable crawls are all built in — see
|
|
114
114
|
[docs/USAGE.md](docs/USAGE.md).
|
|
115
115
|
|
|
116
|
+
## Use with your agent
|
|
117
|
+
|
|
118
|
+
getdocs is built to be driven by a coding agent: it's an ordinary CLI whose
|
|
119
|
+
`out/` tree + `crawl.json` Manifest *is* the return value (no MCP server, no job
|
|
120
|
+
protocol — see [ADR-0007](docs/adr/0007-agent-integration-is-the-cli-not-an-mcp-surface.md)).
|
|
121
|
+
Two patterns cover most uses.
|
|
122
|
+
|
|
123
|
+
**Synchronous — fetch one docs section.** Scope defaults to the seed's host +
|
|
124
|
+
path prefix, so pointing at a subtree fetches just that subtree. This blocks
|
|
125
|
+
until done and works under any agent:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
getdocs crawl https://example.com/docs/auth -o ./out --summary-json
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**Background — mirror a whole site.** Kick the crawl off as a background task
|
|
132
|
+
and keep working. Under **Claude Code** the agent is resumed automatically when
|
|
133
|
+
the crawl finishes; every other agent surveyed blocks or polls the output path
|
|
134
|
+
instead (this is a harness feature, not a getdocs one):
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
getdocs crawl https://example.com/docs -o ./out --summary-json &
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Read the summary, branch on the Outcome.** Every run ends with a one-line
|
|
141
|
+
summary on stderr; `--summary-json` adds a machine-readable object discriminated
|
|
142
|
+
by `outcome`. A run produces exactly one Outcome — a Crawl or a Clone:
|
|
143
|
+
|
|
144
|
+
```jsonc
|
|
145
|
+
// outcome: "crawled" — getdocs scraped the rendered site
|
|
146
|
+
{ "outcome": "crawled", "status": "ok", "pages": 42,
|
|
147
|
+
"output_dir": "./out", "manifest": "./out/crawl.json", "truncated": false }
|
|
148
|
+
|
|
149
|
+
// outcome: "cloned" — the docs were open-source, so getdocs cloned the repo
|
|
150
|
+
// (no pages / no manifest: a Clone is not a Crawl)
|
|
151
|
+
{ "outcome": "cloned", "status": "ok", "repo": "acme/docs",
|
|
152
|
+
"output_dir": "./out/docs", "mkdocs_config": "./out/mkdocs.yml" }
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Have the agent switch on `outcome`:
|
|
156
|
+
|
|
157
|
+
- **`crawled`** → grep and read the Pages under `output_dir` and follow the nav
|
|
158
|
+
/ reading order in `manifest` (`crawl.json`).
|
|
159
|
+
- **`cloned`** → you have the original markdown source; serve it with
|
|
160
|
+
`mkdocs serve -f <mkdocs_config>` (or just read the files under `output_dir`).
|
|
161
|
+
- **`status: "truncated"`** → the crawl hit its page cap; re-run with a higher
|
|
162
|
+
`--limit` (or `0` for unlimited) to get the rest.
|
|
163
|
+
|
|
116
164
|
## Install
|
|
117
165
|
|
|
118
166
|
Requires **Python 3.12+**.
|
|
@@ -11,6 +11,7 @@ src/getdocs/extract.py
|
|
|
11
11
|
src/getdocs/identity.py
|
|
12
12
|
src/getdocs/jobs.py
|
|
13
13
|
src/getdocs/navharvest.py
|
|
14
|
+
src/getdocs/outcome.py
|
|
14
15
|
src/getdocs/output.py
|
|
15
16
|
src/getdocs/scope.py
|
|
16
17
|
src/getdocs/sitemap.py
|
|
@@ -46,6 +47,8 @@ tests/test_shell_detection.py
|
|
|
46
47
|
tests/test_sitemap.py
|
|
47
48
|
tests/test_sitemap_e2e.py
|
|
48
49
|
tests/test_source.py
|
|
50
|
+
tests/test_summary_clone_e2e.py
|
|
51
|
+
tests/test_summary_e2e.py
|
|
49
52
|
tests/test_traversal_e2e.py
|
|
50
53
|
tests/test_urlnorm.py
|
|
51
54
|
tests/test_webhook_api.py
|
|
@@ -170,7 +170,9 @@ def test_clone_source_for_clones_and_writes_config(tmp_path, monkeypatch):
|
|
|
170
170
|
config = CrawlConfig(seeds=["https://docs.acme.io/intro"], output_dir=out)
|
|
171
171
|
result = source.clone_source_for(config)
|
|
172
172
|
|
|
173
|
-
assert result ==
|
|
173
|
+
assert result.repo == "acme/docs"
|
|
174
|
+
assert result.output_dir == out / "docs"
|
|
175
|
+
assert result.mkdocs_config == out / "mkdocs.yml"
|
|
174
176
|
assert (out / "mkdocs.yml").exists()
|
|
175
177
|
|
|
176
178
|
|
|
@@ -198,6 +200,9 @@ def test_clone_source_for_uses_repos_own_mkdocs_yml(tmp_path, monkeypatch):
|
|
|
198
200
|
monkeypatch.setattr(source, "clone_repo", fake_clone)
|
|
199
201
|
config = CrawlConfig(seeds=["https://docs.acme.io/"], output_dir=out)
|
|
200
202
|
|
|
201
|
-
|
|
203
|
+
result = source.clone_source_for(config)
|
|
204
|
+
assert result.output_dir == out / "docs"
|
|
205
|
+
# The repo's own config is used as the serve config, not one we generate.
|
|
206
|
+
assert result.mkdocs_config == out / "docs" / "mkdocs.yml"
|
|
202
207
|
# We don't overwrite a repo that already ships its own config.
|
|
203
208
|
assert not (out / "mkdocs.yml").exists()
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Clone completion summary (#22): the source-first Outcome report.
|
|
2
|
+
|
|
3
|
+
When a run clones the docs' source repo instead of crawling (ADR-0006), it
|
|
4
|
+
reports a `cloned` Outcome — distinct from a Crawl — so an agent can branch on
|
|
5
|
+
it. Verified at the in-process cli.main() seam with the source-first
|
|
6
|
+
orchestrator's network/git collaborators stubbed (prior art: test_source.py
|
|
7
|
+
monkeypatches fetch_html and clone_repo).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
from getdocs import cli, source
|
|
13
|
+
|
|
14
|
+
EDIT_LINK_HTML = (
|
|
15
|
+
'<a href="https://github.com/acme/docs/edit/main/p.md" '
|
|
16
|
+
'title="Edit this page">edit</a>'
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fake_clone_with_docs(tmp_path):
|
|
21
|
+
"""A clone_repo stand-in that fabricates a repo dir holding markdown docs."""
|
|
22
|
+
def _clone(repo_url, dest_parent, timeout=180.0):
|
|
23
|
+
repo = dest_parent / "docs"
|
|
24
|
+
(repo / "docs").mkdir(parents=True)
|
|
25
|
+
(repo / "docs" / "index.md").write_text("# Home")
|
|
26
|
+
return repo
|
|
27
|
+
return _clone
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_clone_prints_one_line_stderr_summary(tmp_path, monkeypatch, capsys):
|
|
31
|
+
out = tmp_path / "out"
|
|
32
|
+
monkeypatch.setattr(source, "fetch_html", lambda url, ua=None: EDIT_LINK_HTML)
|
|
33
|
+
monkeypatch.setattr(source, "clone_repo", fake_clone_with_docs(tmp_path))
|
|
34
|
+
|
|
35
|
+
rc = cli.main(["crawl", "https://docs.acme.io/", "-o", str(out)])
|
|
36
|
+
|
|
37
|
+
assert rc == 0
|
|
38
|
+
err = capsys.readouterr().err
|
|
39
|
+
# Names the source repo and the cloned / serve-config locations.
|
|
40
|
+
assert "getdocs: cloned acme/docs" in err
|
|
41
|
+
assert str(out / "docs") in err
|
|
42
|
+
assert "mkdocs.yml" in err
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_clone_summary_json_emits_cloned_object(tmp_path, monkeypatch, capsys):
|
|
46
|
+
out = tmp_path / "out"
|
|
47
|
+
monkeypatch.setattr(source, "fetch_html", lambda url, ua=None: EDIT_LINK_HTML)
|
|
48
|
+
monkeypatch.setattr(source, "clone_repo", fake_clone_with_docs(tmp_path))
|
|
49
|
+
|
|
50
|
+
rc = cli.main(["crawl", "https://docs.acme.io/", "-o", str(out), "--summary-json"])
|
|
51
|
+
|
|
52
|
+
assert rc == 0
|
|
53
|
+
summary = json.loads(capsys.readouterr().out)
|
|
54
|
+
assert summary["outcome"] == "cloned"
|
|
55
|
+
assert summary["status"] == "ok"
|
|
56
|
+
assert summary["repo"] == "acme/docs"
|
|
57
|
+
assert summary["output_dir"] == str(out / "docs")
|
|
58
|
+
assert summary["mkdocs_config"] == str(out / "mkdocs.yml")
|
|
59
|
+
# A Clone has no Pages and no Manifest (CONTEXT.md): those keys are absent.
|
|
60
|
+
assert "pages" not in summary
|
|
61
|
+
assert "manifest" not in summary
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_clone_stderr_line_and_json_carry_the_same_facts(tmp_path, monkeypatch, capsys):
|
|
65
|
+
out = tmp_path / "out"
|
|
66
|
+
monkeypatch.setattr(source, "fetch_html", lambda url, ua=None: EDIT_LINK_HTML)
|
|
67
|
+
monkeypatch.setattr(source, "clone_repo", fake_clone_with_docs(tmp_path))
|
|
68
|
+
|
|
69
|
+
rc = cli.main(["crawl", "https://docs.acme.io/", "-o", str(out), "--summary-json"])
|
|
70
|
+
|
|
71
|
+
assert rc == 0
|
|
72
|
+
captured = capsys.readouterr()
|
|
73
|
+
summary = json.loads(captured.out)
|
|
74
|
+
assert f"cloned {summary['repo']}" in captured.err
|
|
75
|
+
assert summary["output_dir"] in captured.err
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_agent_can_branch_on_outcome_clone_vs_crawl(tmp_path, monkeypatch, capsys):
|
|
79
|
+
# A cloned run is discriminable from a crawled run purely by `outcome`.
|
|
80
|
+
out = tmp_path / "out"
|
|
81
|
+
monkeypatch.setattr(source, "fetch_html", lambda url, ua=None: EDIT_LINK_HTML)
|
|
82
|
+
monkeypatch.setattr(source, "clone_repo", fake_clone_with_docs(tmp_path))
|
|
83
|
+
|
|
84
|
+
cli.main(["crawl", "https://docs.acme.io/", "-o", str(out), "--summary-json"])
|
|
85
|
+
cloned = json.loads(capsys.readouterr().out)
|
|
86
|
+
|
|
87
|
+
assert cloned["outcome"] == "cloned"
|
|
88
|
+
assert cloned["outcome"] != "crawled"
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Completion summary: the agent-native Outcome report (#21).
|
|
2
|
+
|
|
3
|
+
A finished run reports what it produced — an always-on one-line summary on
|
|
4
|
+
stderr, and an opt-in structured object via --summary-json. Both surfaces are
|
|
5
|
+
rendered from the same Outcome so they cannot disagree (ADR-0007). Verified at
|
|
6
|
+
the subprocess CLI seam (prior art: test_crawl_e2e, test_limits_errors_e2e).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
FIXTURE_HTML = (Path(__file__).parent / "fixtures" / "basic_docs_page.html").read_text()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def page(title, body):
|
|
18
|
+
return f"<html><head><title>{title}</title></head><body><main>{body}</main></body></html>"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_getdocs(*args):
|
|
22
|
+
return subprocess.run(
|
|
23
|
+
[sys.executable, "-m", "getdocs", *args, "--delay", "0"],
|
|
24
|
+
capture_output=True, text=True, timeout=120,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_crawl_prints_one_line_stderr_summary(site, tmp_path):
|
|
29
|
+
site.add("/docs/auth", FIXTURE_HTML)
|
|
30
|
+
seed = f"{site.url}/docs/auth"
|
|
31
|
+
|
|
32
|
+
result = run_getdocs("crawl", seed, "-o", str(tmp_path), "--no-clone-source")
|
|
33
|
+
|
|
34
|
+
assert result.returncode == 0, result.stderr
|
|
35
|
+
# Names the page count and where the Pages landed.
|
|
36
|
+
assert "getdocs: crawled 1 Pages" in result.stderr
|
|
37
|
+
assert str(tmp_path) in result.stderr
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_summary_json_emits_crawled_object_to_stdout(site, tmp_path):
|
|
41
|
+
site.add("/docs/auth", FIXTURE_HTML)
|
|
42
|
+
seed = f"{site.url}/docs/auth"
|
|
43
|
+
|
|
44
|
+
result = run_getdocs(
|
|
45
|
+
"crawl", seed, "-o", str(tmp_path), "--no-clone-source", "--summary-json"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
assert result.returncode == 0, result.stderr
|
|
49
|
+
summary = json.loads(result.stdout)
|
|
50
|
+
assert summary["outcome"] == "crawled"
|
|
51
|
+
assert summary["status"] == "ok"
|
|
52
|
+
assert summary["pages"] == 1
|
|
53
|
+
assert summary["output_dir"] == str(tmp_path)
|
|
54
|
+
assert summary["manifest"] == str(tmp_path / "crawl.json")
|
|
55
|
+
assert summary["truncated"] is False
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_jsonl_summary_emits_no_stdout_object_but_keeps_stderr_line(site):
|
|
59
|
+
site.add("/docs/", page("Home", '<h1>Home</h1><a href="/docs/auth">Auth</a>'))
|
|
60
|
+
site.add("/docs/auth", page("Auth", "<h1>Auth</h1>"))
|
|
61
|
+
|
|
62
|
+
result = run_getdocs(
|
|
63
|
+
"crawl", f"{site.url}/docs/", "--format", "jsonl",
|
|
64
|
+
"--summary-json", "--no-clone-source",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
assert result.returncode == 0, result.stderr
|
|
68
|
+
records = [json.loads(line) for line in result.stdout.strip().split("\n")]
|
|
69
|
+
# stdout stays the page stream: Page records + the final Manifest, nothing else.
|
|
70
|
+
assert records[-1]["type"] == "manifest"
|
|
71
|
+
assert all(r["type"] in ("page", "manifest") for r in records)
|
|
72
|
+
# The stderr line is still emitted in jsonl mode.
|
|
73
|
+
assert "getdocs: crawled 2 Pages" in result.stderr
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_truncated_crawl_reports_truncated_status(site, tmp_path):
|
|
77
|
+
site.add("/docs/", page("Home", '<h1>Home</h1><a href="/docs/p0">start</a>'))
|
|
78
|
+
for i in range(6):
|
|
79
|
+
link = f'<a href="/docs/p{i + 1}">next</a>' if i + 1 < 6 else ""
|
|
80
|
+
site.add(f"/docs/p{i}", page(f"P{i}", f"<h1>P{i}</h1>{link}"))
|
|
81
|
+
|
|
82
|
+
result = run_getdocs(
|
|
83
|
+
"crawl", f"{site.url}/docs/", "-o", str(tmp_path),
|
|
84
|
+
"--limit", "2", "--summary-json", "--no-clone-source",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
assert result.returncode == 0, result.stderr
|
|
88
|
+
summary = json.loads(result.stdout)
|
|
89
|
+
assert summary["status"] == "truncated"
|
|
90
|
+
assert summary["truncated"] is True
|
|
91
|
+
assert "[truncated]" in result.stderr
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_empty_crawl_reports_empty_status_and_exits_nonzero(tmp_path):
|
|
95
|
+
result = run_getdocs(
|
|
96
|
+
"crawl", "http://127.0.0.1:1/none", "-o", str(tmp_path),
|
|
97
|
+
"--summary-json", "--no-clone-source",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
assert result.returncode == 1
|
|
101
|
+
summary = json.loads(result.stdout)
|
|
102
|
+
assert summary["outcome"] == "crawled"
|
|
103
|
+
assert summary["status"] == "empty"
|
|
104
|
+
assert summary["pages"] == 0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_stderr_line_and_json_carry_the_same_facts(site, tmp_path):
|
|
108
|
+
site.add("/docs/auth", FIXTURE_HTML)
|
|
109
|
+
seed = f"{site.url}/docs/auth"
|
|
110
|
+
|
|
111
|
+
result = run_getdocs(
|
|
112
|
+
"crawl", seed, "-o", str(tmp_path), "--no-clone-source", "--summary-json"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
assert result.returncode == 0, result.stderr
|
|
116
|
+
summary = json.loads(result.stdout)
|
|
117
|
+
assert f"crawled {summary['pages']} Pages" in result.stderr
|
|
118
|
+
assert ("[truncated]" in result.stderr) == summary["truncated"]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_resume_run_produces_a_crawled_summary(site, tmp_path):
|
|
122
|
+
site.add("/docs/", page("Home", '<h1>Home</h1><a href="/docs/p0">start</a>'))
|
|
123
|
+
for i in range(4):
|
|
124
|
+
link = f'<a href="/docs/p{i + 1}">next</a>' if i + 1 < 4 else ""
|
|
125
|
+
site.add(f"/docs/p{i}", page(f"P{i}", f"<h1>P{i}</h1>{link}"))
|
|
126
|
+
|
|
127
|
+
first = run_getdocs(
|
|
128
|
+
"crawl", f"{site.url}/docs/", "-o", str(tmp_path),
|
|
129
|
+
"--limit", "2", "--no-clone-source",
|
|
130
|
+
)
|
|
131
|
+
assert first.returncode == 0, first.stderr
|
|
132
|
+
|
|
133
|
+
second = run_getdocs("crawl", "--resume", "-o", str(tmp_path), "--summary-json")
|
|
134
|
+
|
|
135
|
+
assert second.returncode == 0, second.stderr
|
|
136
|
+
summary = json.loads(second.stdout)
|
|
137
|
+
assert summary["outcome"] == "crawled"
|
|
138
|
+
assert "getdocs: crawled" in second.stderr
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|