osc-fast-export 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ Metadata-Version: 2.4
2
+ Name: osc_fast_export
3
+ Version: 1.0.0
4
+ Summary: Generate git-fast-import compatible text stream from OSC history
5
+ Author-email: Matěj Cepl <mcepl@cepl.eu>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://codeberg.org/mcepl/osc-fast-export
8
+ Project-URL: repository, https://codeberg.org/mcepl/osc-fast-export.git
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Information Technology
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Classifier: Topic :: Software Development :: Version Control
15
+ Classifier: Topic :: Software Development :: Version Control :: Git
16
+ Requires-Python: >=3.6
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE.txt
19
+ Dynamic: license-file
20
+
21
+ OSC (openSUSE build service VCS tool) fast-export compatible with
22
+ `git fast-import(1)`.
23
+
24
+ Just run this inside of the OBS package checkout and output
25
+ should be piped to `git fast-import(1)`.
26
+
27
+ It is useful to prepare authors file (per default
28
+ `.osc/authorsfile.txt`) which is one-to-one mapping between logins
29
+ (used in osc log) and full names+emails. E.g., one line of the
30
+ file could be:
31
+
32
+ ```
33
+ mcepl = Matěj Cepl <mcepl@cepl.eu>
34
+ ```
35
+
36
+ Cooperation with [git lfs](https://git-lfs.com/) is
37
+ rather complicated: `git fast-import` can import only to
38
+ completely empty branch, while `git lfs` doesn’t work without
39
+ `.gitattributes`. The only solution I came up with is to put the
40
+ content of `.gitattributes` into `.git/info/attributes`, which is
41
+ read as well, but it doesn’t count as content of the repository
42
+ itself.
43
+
44
+ All issues, questions, complaints, or (even better!) patches
45
+ should be send via email to
46
+ [~mcepl/devel@lists.sr.ht](mailto:~mcepl/devel@lists.sr.ht) email
47
+ list (for patches use [git send-email](https://git-send-email.io/)).
@@ -0,0 +1,7 @@
1
+ osc_fast_export.py,sha256=ad-zOQNpPHNfSvSUQBwUhZ0Wj-ei_R2GZcEMBE6h7iY,12086
2
+ osc_fast_export-1.0.0.dist-info/licenses/LICENSE.txt,sha256=BxnyvG4KNEUw5-M5Jz-VbB87dcW1fdwEaMPO70AegtE,1073
3
+ osc_fast_export-1.0.0.dist-info/METADATA,sha256=-d7zsQ6wFCDqtEAzqagsW5IhExoNT_-6ZMjrtcF8j2U,1876
4
+ osc_fast_export-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ osc_fast_export-1.0.0.dist-info/entry_points.txt,sha256=-B-2JonSiPM4KfTWk9V4BGYFk0bRqBbMfr8h2ne8620,57
6
+ osc_fast_export-1.0.0.dist-info/top_level.txt,sha256=7VKN28LmrR0k82aN2d9gKIdifX20DHB4xWfEu8u2iK8,16
7
+ osc_fast_export-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ osc-fast-export = osc_fast_export:main
@@ -0,0 +1,22 @@
1
+ Copyright 2022, Matěj Cepl mcepl at cepl dot eu
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or
8
+ sell copies of the Software, and to permit persons to whom
9
+ the Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ osc_fast_export
osc_fast_export.py ADDED
@@ -0,0 +1,400 @@
1
+ #!/usr/bin/python3
2
+ # https://git-scm.com/book/en/v2/Git-and-Other-Systems-Migrating-to-Git#_custom_importer
3
+
4
+ import argparse
5
+ import collections
6
+ import configparser
7
+ from datetime import datetime, timezone
8
+ import logging
9
+ import os.path
10
+ import pathlib
11
+ import pprint
12
+ import signal
13
+ from subprocess import CalledProcessError, PIPE, run
14
+ import sys
15
+ from typing import Dict, Iterator, List, Optional
16
+
17
+ import xml.etree.ElementTree as ET
18
+
19
+ logging.basicConfig(
20
+ format="%(levelname)s:%(funcName)s:%(message)s",
21
+ level=logging.INFO,
22
+ )
23
+ log = logging.getLogger("osc_fast_export")
24
+
25
+ signal.signal(signal.SIGPIPE, signal.SIG_IGN)
26
+
27
+
28
+ # For reading section-less config files
29
+ # https://stackoverflow.com/a/2819788/164233
30
+ def FakeSecHead(fp):
31
+ yield "[asection]\n"
32
+ yield from fp
33
+
34
+
35
+ class LogEntry(
36
+ collections.namedtuple(
37
+ "LogEntry",
38
+ ["rev", "md5", "author", "date", "msg", "authors_map"],
39
+ )
40
+ ):
41
+ """
42
+ Represents a single log entry from the Open Build Service (OBS) history.
43
+
44
+ Attributes:
45
+ rev (int): The revision number of the entry.
46
+ md5 (str): The source MD5 checksum of the package at this revision.
47
+ author (str): The OBS username of the author.
48
+ date (datetime): The date and time of the commit.
49
+ msg (str): The commit message.
50
+ """
51
+
52
+ def __str__(self):
53
+ author_identity = self.authors_map.get(
54
+ self.author.lower(), self.author
55
+ )
56
+ return (
57
+ f"{self.rev}, {self.md5[:12]}, {author_identity},"
58
+ + f" {datetime.isoformat(self.date)}:\n{self.msg}"
59
+ )
60
+
61
+ def log_entry(self):
62
+ summary = self.msg.strip().split('\n')[0]
63
+ return f"{self.rev} {datetime.isoformat(self.date)}: {summary}"
64
+
65
+
66
+ def get_authors() -> Dict[str, str]:
67
+ """
68
+ Reads the .osc/authorsfile.txt to get a mapping of OBS
69
+ usernames to Git author identities.
70
+
71
+ The authorsfile.txt is expected to be a simple key-value file
72
+ where keys are OBS usernames and values are Git author
73
+ strings (e.g., "Name <email@example.com>").
74
+
75
+ Returns:
76
+ Dict[str, str]: A dictionary mapping OBS usernames (str) to Git author
77
+ strings (str).
78
+
79
+ Raises:
80
+ FileNotFoundError: If `.osc/authorsfile.txt` does not exist.
81
+ """
82
+ config = configparser.ConfigParser()
83
+ authors_map = {}
84
+ authorsfile = pathlib.Path(".osc", "authorsfile.txt")
85
+
86
+ if not authorsfile.exists():
87
+ raise FileNotFoundError(
88
+ "The .osc/authorsfile.txt file was not found."
89
+ )
90
+
91
+ with open(authorsfile, "r") as fp:
92
+ config.read_file(FakeSecHead(fp))
93
+ authors_map = dict(config.items("asection"))
94
+
95
+ log.debug("authors:\n%s", pprint.pformat(authors_map))
96
+ return authors_map
97
+
98
+
99
+ def osc_log(authors_map: Dict[str, str]) -> Iterator[LogEntry]:
100
+ """
101
+ Retrieves the commit history of the current OBS package
102
+ checkout using 'osc log --xml'.
103
+
104
+ Raises:
105
+ SystemExit: If the script is not run from an OSC checkout.
106
+ RuntimeError: If the 'osc log' command fails to execute.
107
+
108
+ Returns:
109
+ LogEntry: A named tuple representing each log entry, in
110
+ reverse chronological order (from oldest to newest revision).
111
+ """
112
+ log_pid = None
113
+ if not pathlib.Path(".osc").exists():
114
+ print(
115
+ "Must be run from the OSC checkout.", file=sys.stderr
116
+ )
117
+ sys.exit(9)
118
+ try:
119
+ log_pid = run(
120
+ ["osc", "log", "--xml"],
121
+ check=True,
122
+ text=True,
123
+ capture_output=True,
124
+ )
125
+ except CalledProcessError as exc:
126
+ raise RuntimeError(
127
+ f"Cannot collect log of the package!\nReported issue was:\n{str(exc)}"
128
+ ) from exc
129
+ log.debug("log_pid = %s", log_pid)
130
+
131
+ log_str = log_pid.stdout
132
+ log.debug("log_str:\n%s", log_str)
133
+ tree = ET.fromstring(log_str)
134
+ log_list = [
135
+ LogEntry(
136
+ int(entry.attrib["revision"]),
137
+ entry.attrib["srcmd5"],
138
+ entry.findtext("author"),
139
+ datetime.strptime(
140
+ entry.findtext("date"), "%Y-%m-%d %H:%M:%S"
141
+ ).replace(tzinfo=timezone.utc),
142
+ entry.findtext("msg"),
143
+ authors_map,
144
+ )
145
+ for entry in tree.iter("logentry")
146
+ ]
147
+ for entry in reversed(log_list):
148
+ log.info("%s", entry.log_entry())
149
+ yield entry
150
+
151
+
152
+ def checkout_revision(rev: int):
153
+ """
154
+ Checks out a specific revision of the OBS package using 'osc
155
+ up -r' and cleans the working copy.
156
+
157
+ Args:
158
+ rev (int): The revision number to check out.
159
+
160
+ Raises:
161
+ RuntimeError: If 'osc up' or 'osc clean' commands fail.
162
+ """
163
+ try:
164
+ osc_pid = run(
165
+ ["osc", "up", "-e", "-r", f"{rev}"],
166
+ capture_output=True,
167
+ check=True,
168
+ )
169
+ except CalledProcessError as exc:
170
+ raise RuntimeError(
171
+ f"Cannot checkout revision {rev}!\nReported issue was:\n{exc.stderr}"
172
+ ) from exc
173
+
174
+ try:
175
+ osc_pid = run(
176
+ ["osc", "clean"],
177
+ capture_output=True,
178
+ check=True,
179
+ )
180
+ except CalledProcessError as exc:
181
+ raise RuntimeError(
182
+ f"Cannot clean checkout!\nReported issue was:\n{exc.stderr}"
183
+ ) from exc
184
+
185
+
186
+ def _handle_missing_author(
187
+ obs_author: str,
188
+ authors_map: Dict[str, str],
189
+ non_interactive: bool,
190
+ ) -> str:
191
+ """
192
+ Handles cases where an OBS author is not found in the authors map.
193
+
194
+ If in non-interactive mode, it raises a ValueError.
195
+ Otherwise, it prompts the user for the author's name and email,
196
+ updates the authors_map, and appends the new mapping to
197
+ .osc/authorsfile.txt.
198
+
199
+ Args:
200
+ obs_author (str): The OBS username of the author.
201
+ authors_map (Dict[str, str]): The dictionary of author mappings.
202
+ non_interactive (bool): Flag to disable interactive prompts.
203
+
204
+ Returns:
205
+ str: The resolved Git author string.
206
+
207
+ Raises:
208
+ ValueError: If no author mapping is found in non-interactive mode.
209
+ """
210
+ if non_interactive:
211
+ raise ValueError(
212
+ f"No author mapping found for '{obs_author}' in non-interactive mode. "
213
+ f"Please add a mapping to .osc/authorsfile.txt for '{obs_author.lower()}'."
214
+ )
215
+
216
+ log.warning(
217
+ "No entry for %s in .osc/authorsfile.txt",
218
+ obs_author,
219
+ )
220
+ name = ""
221
+ while not (name or name == obs_author):
222
+ sys.stderr.write(f"Enter name for user '{obs_author}': ")
223
+ sys.stderr.flush()
224
+ name = input().strip()
225
+ email = ""
226
+ while not (email or "@" in email or "<" in email):
227
+ sys.stderr.write(
228
+ f"Enter email for user '{obs_author}': "
229
+ )
230
+ sys.stderr.flush()
231
+ email = input().strip()
232
+ if not email or "@" not in email:
233
+ log.warning(
234
+ "Invalid email, please enter a valid email address."
235
+ )
236
+
237
+ author = f"{name} <{email}>"
238
+ authors_map[obs_author.lower()] = author
239
+
240
+ authorsfile = pathlib.Path(".osc", "authorsfile.txt")
241
+ with open(authorsfile, "a") as fp:
242
+ fp.write(f"{obs_author.lower()} = {author}\n")
243
+ log.info(
244
+ "Mapping for %s saved to %s",
245
+ obs_author,
246
+ authorsfile,
247
+ )
248
+ return author
249
+
250
+
251
+ def write_line(s: str):
252
+ sys.stdout.buffer.write((s + "\n").encode("utf-8"))
253
+ sys.stdout.buffer.flush()
254
+
255
+
256
+ def write_binary(b: bytes):
257
+ sys.stdout.buffer.write(b)
258
+ sys.stdout.buffer.write(b"\n")
259
+ sys.stdout.buffer.flush()
260
+
261
+
262
+ def print_export(
263
+ entry: LogEntry,
264
+ authors_map: Dict[str, str],
265
+ branch: str,
266
+ previous_mark: Optional[int],
267
+ non_interactive: bool,
268
+ ) -> int:
269
+ """
270
+ Prints Git fast-export commands for a given log entry.
271
+
272
+ This function generates the 'commit', 'mark', 'committer', 'data',
273
+ 'deleteall', and 'M' (modify/add file) commands for git fast-import.
274
+ It checks out the specified revision, then iterates through all files
275
+ in the working directory (excluding .osc) to add them to the commit.
276
+
277
+ Args:
278
+ entry (LogEntry): The log entry object containing commit details.
279
+ authors_map (Dict[str, str]): A dictionary mapping OBS
280
+ usernames to Git author strings.
281
+ branch (str): The name of the Git branch to commit to.
282
+ previous_mark (Optional[int]): The mark of the previous commit in the
283
+ history, or None if this is the first commit.
284
+
285
+ Returns:
286
+ int: The 'mark' generated for the current commit (which
287
+ is the revision number).
288
+
289
+ Raises:
290
+ ValueError: If no Git author mapping is found for the OBS
291
+ author in the log entry.
292
+ """
293
+ log.debug("entry:\n%s", str(entry))
294
+ mark = entry.rev
295
+
296
+ author = authors_map.get(entry.author.lower())
297
+ if author is None:
298
+ author = _handle_missing_author(
299
+ entry.author, authors_map, non_interactive
300
+ )
301
+ log.debug(f"author = {author}")
302
+
303
+ checkout_revision(entry.rev)
304
+ # Use the 'branch' parameter here
305
+ write_line(f"commit refs/heads/{branch}")
306
+ write_line(f"mark :{mark}")
307
+ if entry.md5:
308
+ write_line(f"original-oid {entry.md5}")
309
+ ts = int(entry.date.timestamp())
310
+ write_line(f"author {author} {ts} +0000")
311
+ write_line(f"committer {author} {ts} +0000")
312
+ msg_bytes = entry.msg.encode("utf-8")
313
+ write_line(f"data {len(msg_bytes)}")
314
+ write_binary(msg_bytes)
315
+ # Use the 'previous_mark' parameter here
316
+ if previous_mark:
317
+ write_line(f"from :{previous_mark}")
318
+
319
+ # Create actual content of the commit
320
+ # It is easier just to wipe out everything and include files again.
321
+ write_line("deleteall")
322
+ for dirpath, dirnames, filenames in os.walk("."):
323
+ # TODO are osc notes (aka osc comment) a thing?
324
+ dirpath = os.path.relpath(dirpath, ".")
325
+ if dirpath.startswith(".osc"):
326
+ continue
327
+ # It seems git-fast-export doesn't export directories at all
328
+ # and git-fast-import just creates them when needed.
329
+ # if dirpath != '.':
330
+ # # create directory
331
+ # print(f'M 040000 inline {dirpath}')
332
+ for fn in filenames:
333
+ fname = os.path.relpath(
334
+ os.path.join(dirpath, fn), "."
335
+ )
336
+ log.debug("dirpath = %s, fname = %s", dirpath, fname)
337
+ fstat = f"{os.stat(fname).st_mode:o}"
338
+ write_line(f"M {fstat} inline {fname}")
339
+ with open(fname, "rb") as inf:
340
+ dt = inf.read()
341
+ write_line(f"data {len(dt)}")
342
+ write_binary(dt)
343
+ write_line("")
344
+
345
+ return mark
346
+
347
+
348
+ def main():
349
+ """
350
+ Main function to run the OSC to Git fast-export conversion.
351
+
352
+ Parses command-line arguments, sets up logging, retrieves author mappings,
353
+ and then iterates through the OBS log to generate Git fast-export commands
354
+ for each revision.
355
+ """
356
+ parser = argparse.ArgumentParser(prog="myprogram")
357
+ parser.add_argument(
358
+ "--debug",
359
+ action="store_true",
360
+ help="print out debugging information",
361
+ )
362
+ # Added optional branch argument
363
+ parser.add_argument(
364
+ "-b",
365
+ "--branch",
366
+ default="main",
367
+ help="branch name to store commits in (default: main)",
368
+ )
369
+ parser.add_argument(
370
+ "--non-interactive",
371
+ action="store_true",
372
+ help="disable interactive prompts for missing author information",
373
+ )
374
+ args = parser.parse_args()
375
+
376
+ if args.debug:
377
+ log.setLevel(logging.DEBUG)
378
+
379
+ authors = get_authors()
380
+
381
+ # Get the branch name from arguments
382
+ branch_name = args.branch
383
+
384
+ # Initialize the mark for the first commit
385
+ current_mark = None
386
+
387
+ for logentry in osc_log(authors):
388
+ # Pass the current_mark to print_export and update it with the new mark
389
+ current_mark = print_export(
390
+ logentry,
391
+ authors,
392
+ branch_name,
393
+ current_mark,
394
+ args.non_interactive,
395
+ )
396
+ write_line("done")
397
+
398
+
399
+ if __name__ == "__main__":
400
+ main()