numbers-parser 4.12.1__tar.gz → 4.13.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/PKG-INFO +109 -6
  2. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/README.md +108 -5
  3. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/pyproject.toml +2 -1
  4. numbers_parser-4.13.1/src/numbers_parser/_csv2numbers.py +416 -0
  5. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/cell.py +2 -0
  6. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/model.py +7 -0
  7. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/LICENSE.rst +0 -0
  8. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/__init__.py +0 -0
  9. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/_cat_numbers.py +0 -0
  10. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/_unpack_numbers.py +0 -0
  11. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/bullets.py +0 -0
  12. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/constants.py +0 -0
  13. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/containers.py +0 -0
  14. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/currencies.py +0 -0
  15. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/data/empty.numbers +0 -0
  16. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/document.py +0 -0
  17. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/exceptions.py +0 -0
  18. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/experimental.py +0 -0
  19. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/formula.py +0 -0
  20. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TNArchives_pb2.py +0 -0
  21. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TNArchives_sos_pb2.py +0 -0
  22. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TNCommandArchives_pb2.py +0 -0
  23. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TNCommandArchives_sos_pb2.py +0 -0
  24. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSAArchives_pb2.py +0 -0
  25. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSAArchives_sos_pb2.py +0 -0
  26. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSACommandArchives_sos_pb2.py +0 -0
  27. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCEArchives_pb2.py +0 -0
  28. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCH3DArchives_pb2.py +0 -0
  29. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHArchives_Common_pb2.py +0 -0
  30. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHArchives_GEN_pb2.py +0 -0
  31. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHArchives_pb2.py +0 -0
  32. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHArchives_sos_pb2.py +0 -0
  33. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHCommandArchives_pb2.py +0 -0
  34. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCHPreUFFArchives_pb2.py +0 -0
  35. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCKArchives_pb2.py +0 -0
  36. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSCKArchives_sos_pb2.py +0 -0
  37. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSDArchives_pb2.py +0 -0
  38. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSDArchives_sos_pb2.py +0 -0
  39. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSDCommandArchives_pb2.py +0 -0
  40. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSKArchives_pb2.py +0 -0
  41. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSPArchiveMessages_pb2.py +0 -0
  42. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSPDatabaseMessages_pb2.py +0 -0
  43. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSPMessages_pb2.py +0 -0
  44. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSSArchives_pb2.py +0 -0
  45. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSSArchives_sos_pb2.py +0 -0
  46. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSTArchives_pb2.py +0 -0
  47. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSTArchives_sos_pb2.py +0 -0
  48. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSTCommandArchives_pb2.py +0 -0
  49. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSTStylePropertyArchiving_pb2.py +0 -0
  50. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSWPArchives_pb2.py +0 -0
  51. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSWPArchives_sos_pb2.py +0 -0
  52. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/TSWPCommandArchives_pb2.py +0 -0
  53. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/__init__.py +0 -0
  54. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/fontmap.py +0 -0
  55. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/functionmap.py +0 -0
  56. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/generated/mapping.py +0 -0
  57. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/iwafile.py +0 -0
  58. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/iwork.py +0 -0
  59. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/numbers_cache.py +0 -0
  60. {numbers_parser-4.12.1 → numbers_parser-4.13.1}/src/numbers_parser/numbers_uuid.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numbers-parser
3
- Version: 4.12.1
3
+ Version: 4.13.1
4
4
  Summary: Read and write Apple Numbers spreadsheets
5
5
  Home-page: https://github.com/masaccio/numbers-parser
6
6
  License: MIT
@@ -78,11 +78,11 @@ sudo apt-get -y install libsnappy-dev
78
78
  ```
79
79
 
80
80
  On Windows, you will need to either arrange for snappy to be found for VSC++ or you can install python
81
- binary libraries compiled by [Christoph Gohlke](https://www.lfd.uci.edu/~gohlke/pythonlibs/#python-snappy). You must select the correct python
82
- version for your installation. For example for python 3.11:
81
+ [pre-compiled binary libraries](https://github.com/cgohlke/win_arm64-wheels/) which are only available
82
+ for Windows on Arm. There appear to be no x86 pre-compiled packages for Windows.
83
83
 
84
84
  ```text
85
- pip install python_snappy-0.6.1-cp311-cp311-win_amd64.whl
85
+ pip install python_snappy-0.6.1-cp312-cp312-win_arm64.whl
86
86
  ```
87
87
 
88
88
  ## Quick Start
@@ -337,8 +337,15 @@ see the [full API docs](https://masaccio.github.io/numbers-parser/).
337
337
  ## Command-line scripts
338
338
 
339
339
  When installed from [PyPI](https://pypi.org/project/numbers-parser/),
340
- a command-like script `cat-numbers` is installed in Python’s scripts
341
- folder. This script dumps Numbers spreadsheets into Excel-compatible CSV
340
+ a number of command-line scripts are installed:
341
+
342
+ - `cat-numbers`: converts Numbers documents into CSV
343
+ - `csv2numbers`: converts CSV files to Numbers documents
344
+ - `unpack-numbers`: converts Numbers documents into JSON files for debug purposes
345
+
346
+ ### cat-numbers
347
+
348
+ This script dumps Numbers spreadsheets into Excel-compatible CSV
342
349
  format, iterating through all the spreadsheets passed on the
343
350
  command-line.
344
351
 
@@ -375,6 +382,102 @@ and `datetime.strftime`. Numbers in English locales displays 12-hour
375
382
  times with ‘am’ and ‘pm’, but `datetime.strftime` on macOS at least
376
383
  cannot return lower-case versions of AM/PM.
377
384
 
385
+ ### csv2numbers
386
+
387
+ This script converts Excel-compatible CSV files into Numbers documents. Output files
388
+ can optionally be provided, but is none are provided, the output is created by replacing
389
+ the input’s files suffix with .numbers. For example:
390
+
391
+ ```text
392
+ csv2numbers file1.csv file2.csv -o file1.numbers file2.numbers
393
+ ```
394
+
395
+ Columns of data can have a number of transformations applied to them. The primary use-
396
+ case intended for `csv2numbers` is converting banking exports to well-formatted
397
+ spreadsheets.
398
+
399
+ ```text
400
+ usage: csv2numbers [-h] [-V] [--whitespace] [--reverse] [--no-header]
401
+ [--day-first] [--date COLUMNS] [--rename COLUMNS-MAP]
402
+ [--transform COLUMNS-MAP] [--delete COLUMNS]
403
+ [-o [FILENAME ...]]
404
+ [csvfile ...]
405
+
406
+ positional arguments:
407
+ csvfile CSV file to convert
408
+
409
+ options:
410
+ -h, --help show this help message and exit
411
+ -V, --version
412
+ --whitespace strip whitespace from beginning and end of strings
413
+ and collapse other whitespace into single space
414
+ (default: false)
415
+ --reverse reverse the order of the data rows (default:
416
+ false)
417
+ --no-header CSV file has no header row (default: false)
418
+ --day-first dates are represented day first in the CSV file
419
+ (default: false)
420
+ --date COLUMNS comma-separated list of column names/indexes to
421
+ parse as dates
422
+ --rename COLUMNS-MAP comma-separated list of column names/indexes to
423
+ renamed as 'OLD:NEW'
424
+ --transform COLUMNS-MAP
425
+ comma-separated list of column names/indexes to
426
+ transform as 'NEW:FUNC=OLD'
427
+ --delete COLUMNS comma-separated list of column names/indexes to
428
+ delete
429
+ -o [FILENAME ...], --output [FILENAME ...]
430
+ output filename (default: use source file with
431
+ .numbers)
432
+ ```
433
+
434
+ The following options affecting the output of the entire file. The default for each is always false.
435
+
436
+ - `--whitespace`: strip whitespace from beginning and end of strings and collapse other whitespace into single space
437
+ - `--reverse`: reverse the order of the data rows
438
+ - `--no-header`: CSV file has no header row
439
+ - ``--day-first`: dates are represented day first in the CSV file
440
+
441
+ `csv2numbers` can also perform column manipulation. Columns can be identified using their name if the CSV file has a header or using a column index. Columns are zero-indexed and names and indices can be used together on the same command-line. When multiple columns are required, you can specify them using comma-separated values. The format for these arguments, like for the CSV file itself, the Excel dialect.
442
+
443
+ #### Deleting columns
444
+
445
+ Delete columns using `--delete`. The names or indices of the columns to delete are specified as comma-separated values:
446
+
447
+ ```text
448
+ csv2numbers file1.csv --delete=Account,3
449
+ ```
450
+
451
+ #### Renaming columns
452
+
453
+ Rename columns using `--rename`. The current column name and new column name are separated by a `:` and each renaming is specified as comma-separated values:
454
+
455
+ ```text
456
+ csv2numbers file1.csv --rename=2:Account,"Paid In":Amount
457
+ ```
458
+
459
+ #### Date columns
460
+
461
+ The `--date` option identifies a comma-separated list of columns that should be parsed as dates. Use `--day-first` where the day and month is ambiguous anf the day comes first rather than the month.
462
+
463
+ #### Transforming columns
464
+
465
+ Columns can be merged and new columns created using simple functions. The –transform option takes a comma-seperated list of transformations of the form NEW:FUNC=OLD. Supported functions are:
466
+
467
+ | Function | Arguments | Description |
468
+ |------------|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
469
+ | MERGE | dest=MERGE:source | The dest column is writen with values from one or more columns<br/>indicated by source. For multiple columns, which are separated<br/>by ;, the first empty value is chosen. |
470
+ | NEG | dest=NEG:source | The dest column contains absolute values of any column that is<br/>negative. This is useful for isolating debits from account<br/>exports. |
471
+ | POS | dest=NEG:source | The dest column contains values of any column that is<br/>positive. This is useful for isolating credits from account<br/>exports. |
472
+ | LOOKUP | dest=LOOKUP:source;filename | A lookup map is read from filename which must be an Apple<br/>Numbers file containing a single table of two columns. The table<br/>is used to match agsinst source, searching the first column<br/>for matches and writing the corresponding value from the second<br/>column to dest. Values are chosen based on the longest<br/>matching substring. |
473
+
474
+ Examples:
475
+
476
+ ```text
477
+ csv2numbers --transform="Paid In"=POS:Amount,Withdrawn=NEG:Amount file1.csv
478
+ csv2numbers --transform='Category=LOOKUP:Transaction;mapping.numbers' file1.csv
479
+ ```
480
+
378
481
  ## Limitations
379
482
 
380
483
  Current known limitations of `numbers-parser` which may be implemented in the future are:
@@ -46,11 +46,11 @@ sudo apt-get -y install libsnappy-dev
46
46
  ```
47
47
 
48
48
  On Windows, you will need to either arrange for snappy to be found for VSC++ or you can install python
49
- binary libraries compiled by [Christoph Gohlke](https://www.lfd.uci.edu/~gohlke/pythonlibs/#python-snappy). You must select the correct python
50
- version for your installation. For example for python 3.11:
49
+ [pre-compiled binary libraries](https://github.com/cgohlke/win_arm64-wheels/) which are only available
50
+ for Windows on Arm. There appear to be no x86 pre-compiled packages for Windows.
51
51
 
52
52
  ```text
53
- pip install python_snappy-0.6.1-cp311-cp311-win_amd64.whl
53
+ pip install python_snappy-0.6.1-cp312-cp312-win_arm64.whl
54
54
  ```
55
55
 
56
56
  ## Quick Start
@@ -305,8 +305,15 @@ see the [full API docs](https://masaccio.github.io/numbers-parser/).
305
305
  ## Command-line scripts
306
306
 
307
307
  When installed from [PyPI](https://pypi.org/project/numbers-parser/),
308
- a command-like script `cat-numbers` is installed in Python’s scripts
309
- folder. This script dumps Numbers spreadsheets into Excel-compatible CSV
308
+ a number of command-line scripts are installed:
309
+
310
+ - `cat-numbers`: converts Numbers documents into CSV
311
+ - `csv2numbers`: converts CSV files to Numbers documents
312
+ - `unpack-numbers`: converts Numbers documents into JSON files for debug purposes
313
+
314
+ ### cat-numbers
315
+
316
+ This script dumps Numbers spreadsheets into Excel-compatible CSV
310
317
  format, iterating through all the spreadsheets passed on the
311
318
  command-line.
312
319
 
@@ -343,6 +350,102 @@ and `datetime.strftime`. Numbers in English locales displays 12-hour
343
350
  times with ‘am’ and ‘pm’, but `datetime.strftime` on macOS at least
344
351
  cannot return lower-case versions of AM/PM.
345
352
 
353
+ ### csv2numbers
354
+
355
+ This script converts Excel-compatible CSV files into Numbers documents. Output files
356
+ can optionally be provided, but is none are provided, the output is created by replacing
357
+ the input’s files suffix with .numbers. For example:
358
+
359
+ ```text
360
+ csv2numbers file1.csv file2.csv -o file1.numbers file2.numbers
361
+ ```
362
+
363
+ Columns of data can have a number of transformations applied to them. The primary use-
364
+ case intended for `csv2numbers` is converting banking exports to well-formatted
365
+ spreadsheets.
366
+
367
+ ```text
368
+ usage: csv2numbers [-h] [-V] [--whitespace] [--reverse] [--no-header]
369
+ [--day-first] [--date COLUMNS] [--rename COLUMNS-MAP]
370
+ [--transform COLUMNS-MAP] [--delete COLUMNS]
371
+ [-o [FILENAME ...]]
372
+ [csvfile ...]
373
+
374
+ positional arguments:
375
+ csvfile CSV file to convert
376
+
377
+ options:
378
+ -h, --help show this help message and exit
379
+ -V, --version
380
+ --whitespace strip whitespace from beginning and end of strings
381
+ and collapse other whitespace into single space
382
+ (default: false)
383
+ --reverse reverse the order of the data rows (default:
384
+ false)
385
+ --no-header CSV file has no header row (default: false)
386
+ --day-first dates are represented day first in the CSV file
387
+ (default: false)
388
+ --date COLUMNS comma-separated list of column names/indexes to
389
+ parse as dates
390
+ --rename COLUMNS-MAP comma-separated list of column names/indexes to
391
+ renamed as 'OLD:NEW'
392
+ --transform COLUMNS-MAP
393
+ comma-separated list of column names/indexes to
394
+ transform as 'NEW:FUNC=OLD'
395
+ --delete COLUMNS comma-separated list of column names/indexes to
396
+ delete
397
+ -o [FILENAME ...], --output [FILENAME ...]
398
+ output filename (default: use source file with
399
+ .numbers)
400
+ ```
401
+
402
+ The following options affecting the output of the entire file. The default for each is always false.
403
+
404
+ - `--whitespace`: strip whitespace from beginning and end of strings and collapse other whitespace into single space
405
+ - `--reverse`: reverse the order of the data rows
406
+ - `--no-header`: CSV file has no header row
407
+ - ``--day-first`: dates are represented day first in the CSV file
408
+
409
+ `csv2numbers` can also perform column manipulation. Columns can be identified using their name if the CSV file has a header or using a column index. Columns are zero-indexed and names and indices can be used together on the same command-line. When multiple columns are required, you can specify them using comma-separated values. The format for these arguments, like for the CSV file itself, the Excel dialect.
410
+
411
+ #### Deleting columns
412
+
413
+ Delete columns using `--delete`. The names or indices of the columns to delete are specified as comma-separated values:
414
+
415
+ ```text
416
+ csv2numbers file1.csv --delete=Account,3
417
+ ```
418
+
419
+ #### Renaming columns
420
+
421
+ Rename columns using `--rename`. The current column name and new column name are separated by a `:` and each renaming is specified as comma-separated values:
422
+
423
+ ```text
424
+ csv2numbers file1.csv --rename=2:Account,"Paid In":Amount
425
+ ```
426
+
427
+ #### Date columns
428
+
429
+ The `--date` option identifies a comma-separated list of columns that should be parsed as dates. Use `--day-first` where the day and month is ambiguous anf the day comes first rather than the month.
430
+
431
+ #### Transforming columns
432
+
433
+ Columns can be merged and new columns created using simple functions. The –transform option takes a comma-seperated list of transformations of the form NEW:FUNC=OLD. Supported functions are:
434
+
435
+ | Function | Arguments | Description |
436
+ |------------|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
437
+ | MERGE | dest=MERGE:source | The dest column is writen with values from one or more columns<br/>indicated by source. For multiple columns, which are separated<br/>by ;, the first empty value is chosen. |
438
+ | NEG | dest=NEG:source | The dest column contains absolute values of any column that is<br/>negative. This is useful for isolating debits from account<br/>exports. |
439
+ | POS | dest=NEG:source | The dest column contains values of any column that is<br/>positive. This is useful for isolating credits from account<br/>exports. |
440
+ | LOOKUP | dest=LOOKUP:source;filename | A lookup map is read from filename which must be an Apple<br/>Numbers file containing a single table of two columns. The table<br/>is used to match agsinst source, searching the first column<br/>for matches and writing the corresponding value from the second<br/>column to dest. Values are chosen based on the longest<br/>matching substring. |
441
+
442
+ Examples:
443
+
444
+ ```text
445
+ csv2numbers --transform="Paid In"=POS:Amount,Withdrawn=NEG:Amount file1.csv
446
+ csv2numbers --transform='Category=LOOKUP:Transaction;mapping.numbers' file1.csv
447
+ ```
448
+
346
449
  ## Limitations
347
450
 
348
451
  Current known limitations of `numbers-parser` which may be implemented in the future are:
@@ -12,11 +12,12 @@ name = "numbers-parser"
12
12
  packages = [{include = "numbers_parser", from = "src"}]
13
13
  readme = "README.md"
14
14
  repository = "https://github.com/masaccio/numbers-parser"
15
- version = "4.12.1"
15
+ version = "4.13.1"
16
16
 
17
17
  [tool.poetry.scripts]
18
18
  cat-numbers = "numbers_parser._cat_numbers:main"
19
19
  unpack-numbers = "numbers_parser._unpack_numbers:main"
20
+ csv2numbers = "numbers_parser._csv2numbers:main"
20
21
 
21
22
  [tool.poetry.dependencies]
22
23
  compact-json = "^1.1.3"
@@ -0,0 +1,416 @@
1
+ """Command-line utility to convert CSV files to Apple Numbers spreadsheets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import csv
7
+ import re
8
+ from dataclasses import dataclass
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from sys import exit, stderr
12
+ from typing import NamedTuple, Tuple # noqa: F401
13
+
14
+ from dateutil.parser import parse
15
+
16
+ from numbers_parser import Document, NumbersError, _get_version
17
+
18
+
19
+ class ColumnTransform(NamedTuple):
20
+ """Class for holding a column transformation rule."""
21
+
22
+ source: list[str]
23
+ dest: str
24
+ func: callable
25
+
26
+
27
+ @dataclass
28
+ class Converter:
29
+ input_filename: str = None
30
+ output_filename: str = None
31
+ date_columns: list = None
32
+ day_first: bool = False
33
+ no_header: bool = False
34
+ reverse: bool = False
35
+ whitespace: bool = None
36
+
37
+ def __post_init__(self: Converter) -> None:
38
+ """Parse CSV file with Pandas and return a dataframe."""
39
+ self._read_csv()
40
+ self._transform_data()
41
+
42
+ def _read_csv(self) -> None:
43
+ """Read a CSV file into the data array."""
44
+ try:
45
+ dialect = csv.excel
46
+ dialect.strict = True
47
+ lineno = 1
48
+ with open(self.input_filename) as csvfile:
49
+ csvreader = csv.reader(csvfile, dialect=dialect)
50
+ if self.no_header:
51
+ self.header = None
52
+ else:
53
+ self.header = next(csvreader)
54
+ lineno += 1
55
+ self.data = []
56
+ for row in csvreader:
57
+ self.data += [row]
58
+ lineno += 1
59
+ except FileNotFoundError as e:
60
+ msg = f"{self.input_filename}: file not found"
61
+ raise RuntimeError(msg) from e
62
+ except csv.Error as e:
63
+ msg = f"{self.input_filename}@{lineno}: {e.args[0]}"
64
+ raise RuntimeError(msg) from e
65
+
66
+ def _parse_date(self, x) -> datetime:
67
+ """Parse a date string and return a datetime."""
68
+ return parse(x, dayfirst=self.day_first).replace(tzinfo=timezone.utc)
69
+
70
+ def _transform_data(self):
71
+ """Apply type transformations to the data based in current configuration."""
72
+ # Convert data rows to dicts. csv.DictReader is not enough as we support CSV
73
+ # files with no header.
74
+ if self.no_header:
75
+ self.header = [x for x in range(len(self.data[0]))]
76
+ self.data = [{k: v for k, v in dict(zip(self.header, row)).items()} for row in self.data]
77
+
78
+ if self.reverse:
79
+ self.data = list(reversed(self.data))
80
+ if self.date_columns is not None:
81
+ is_date_column = {x: True if x in self.date_columns else False for x in self.header}
82
+ for row in self.data:
83
+ for k, v in row.items():
84
+ if self.whitespace:
85
+ row[k] = re.sub(r"\s+", " ", v.strip())
86
+ if self.date_columns is not None and is_date_column[k]:
87
+ row[k] = self._parse_date(v)
88
+ else:
89
+ # Attempt to coerce value into float
90
+ try:
91
+ row[k] = float(v.replace(",", ""))
92
+ except ValueError:
93
+ pass
94
+
95
+ def rename_columns(self: Converter, mapper: dict) -> None:
96
+ """Rename columns using column map."""
97
+ if mapper is None:
98
+ return
99
+ self.no_header = False
100
+ self.header = [mapper[x] if x in mapper else x for x in self.header]
101
+
102
+ def delete_columns(self: Converter, columns: list) -> None:
103
+ """Delete columns from the data."""
104
+ if columns is None:
105
+ return
106
+
107
+ missing_columns = list(set(columns) - set(self.header))
108
+ if len(missing_columns) > 0:
109
+ msg = "'" + "', '".join([str(x) for x in missing_columns]) + "'"
110
+ msg += ": cannot delete: column(s) do not exist in CSV"
111
+ raise RuntimeError(msg) from None
112
+
113
+ for col in columns:
114
+ del self.header[self.header.index(col)]
115
+ for row in self.data:
116
+ for col in columns:
117
+ del row[col]
118
+
119
+ def transform_columns(self: Converter, columns: list[ColumnTransform]) -> None:
120
+ """Perform column transformationstransformations."""
121
+ if columns is None:
122
+ return
123
+ for transform in columns:
124
+ transform.transform(self.header, self.data)
125
+ if transform.dest not in self.header:
126
+ self.header.append(transform.dest)
127
+
128
+ def save(self: Converter) -> None:
129
+ """Write dataframe transctions to a Numbers file."""
130
+ doc = Document(num_rows=2, num_cols=2)
131
+ table = doc.sheets[0].tables[0]
132
+
133
+ if self.no_header:
134
+ data = []
135
+ else:
136
+ data = [self.header]
137
+ data += [row.values() for row in self.data]
138
+
139
+ for row_num, row in enumerate(data):
140
+ for col_num, value in enumerate(row):
141
+ table.write(row_num, col_num, value)
142
+ if isinstance(value, datetime):
143
+ table.set_cell_formatting(
144
+ row_num, col_num, "datetime", date_time_format="d MMM yyyy"
145
+ )
146
+
147
+ doc.save(self.output_filename)
148
+
149
+
150
+ class Transformer:
151
+ """Base class for column transformations."""
152
+
153
+ def __init__(self: Transformer, source: str, dest: str) -> None:
154
+ self.dest = int(dest) if dest.isnumeric() else dest
155
+ self.sources = [int(x) if x.isnumeric() else x for x in source.split(";")]
156
+
157
+ def transform_row(self: Transformer, row: list[str]) -> list[str]:
158
+ """Abstract base method for transforming rows using df.apply()."""
159
+ raise NotImplementedError
160
+
161
+ def transform(self: Transformer, header: list[str], data: list[list[str]]) -> list[list[str]]:
162
+ """Column transform to merge columns."""
163
+ if not all(x in header for x in self.sources):
164
+ missing = list(set(self.sources) - set(header))
165
+ msg = "'" + "', '".join([str(x) for x in missing]) + "'"
166
+ msg += ": transform failed: column(s) do not exist in CSV"
167
+ raise RuntimeError(msg)
168
+ for row in data:
169
+ self.transform_row(row)
170
+
171
+
172
+ class MergeTransformer(Transformer):
173
+ """Transformer for column MERGE operations."""
174
+
175
+ def transform_row(self: MergeTransformer, row: list[str]) -> list[str]:
176
+ """Merge data in a single row."""
177
+ value = ""
178
+ for col in self.sources:
179
+ if row[col] and not value:
180
+ value = row[col]
181
+ row[self.dest] = value
182
+
183
+
184
+ class NegTransformer(Transformer):
185
+ """Transformer for column NEG operations."""
186
+
187
+ def transform_row(self: NegTransformer, row: list[str]) -> list[str]:
188
+ """Select negative values for a row."""
189
+ value = ""
190
+ for col in self.sources:
191
+ if row[col] and not value and float(row[col]) < 0:
192
+ value = abs(float(row[col]))
193
+ row[self.dest] = value
194
+
195
+
196
+ class PosTransformer(Transformer):
197
+ """Transformer for column POS operations."""
198
+
199
+ def transform_row(self: PosTransformer, row: list[str]) -> list[str]:
200
+ """Select positive values for a row."""
201
+ value = ""
202
+ for col in self.sources:
203
+ if row[col] and not value and float(row[col]) > 0:
204
+ value = float(row[col])
205
+ row[self.dest] = value
206
+
207
+
208
+ class LookupTransformer(Transformer):
209
+ """Transformer for column LOOKUP operations."""
210
+
211
+ def __init__(self: Transformer, source: str, dest: str) -> None:
212
+ super().__init__(source, dest)
213
+
214
+ if len(self.sources) != 2:
215
+ msg = f"'{self.sources}' LOOKUP must have exactly 2 arguments"
216
+ raise RuntimeError(msg) from None
217
+
218
+ (source, map_filname) = self.sources
219
+ self.sources = [source]
220
+
221
+ if not Path(map_filname).exists():
222
+ msg = f"{map_filname}: no such file or directory"
223
+ raise RuntimeError(msg) from None
224
+
225
+ try:
226
+ doc = Document(map_filname)
227
+ table = doc.sheets[0].tables[0]
228
+ self.lookup_map = {
229
+ table.cell(row_num, 0).value: table.cell(row_num, 1).value
230
+ for row_num in range(table.num_rows)
231
+ }
232
+ except NumbersError as e:
233
+ msg = f"{map_filname}: {e!r}"
234
+ raise RuntimeError(msg) from e
235
+
236
+ def transform_row(self: LookupTransformer, row: list[str]) -> list[str]:
237
+ """Column transform to map values based on a lookup table."""
238
+ matches = [
239
+ {"value": v, "len": len(k)}
240
+ for k, v in self.lookup_map.items()
241
+ if k.lower() in row[self.sources[0]].lower()
242
+ ]
243
+ if len(matches) > 0:
244
+ row[self.dest] = max(matches, key=lambda x: x["len"])["value"]
245
+ else:
246
+ row[self.dest] = ""
247
+
248
+
249
+ TRANSFORMERS = {
250
+ "merge": MergeTransformer,
251
+ "neg": NegTransformer,
252
+ "pos": PosTransformer,
253
+ "lookup": LookupTransformer,
254
+ }
255
+
256
+
257
+ def parse_columns(arg: str) -> list:
258
+ """Parse a list of column names in Excel-compatible CSV format."""
259
+ try:
260
+ return [int(x) if x.isnumeric() else x for x in next(csv.reader([arg], strict=True))]
261
+ except csv.Error as e:
262
+ msg = f"'{arg}': can't parse argument"
263
+ raise argparse.ArgumentTypeError(msg) from e
264
+
265
+
266
+ def parse_column_renames(arg: str) -> dict:
267
+ """Parse a list of column renames in Excel-compatible CSV format."""
268
+ mapper = {}
269
+ try:
270
+ for mapping in next(csv.reader([arg], strict=True)):
271
+ if mapping.count(":") != 1:
272
+ msg = f"'{mapping}': column rename maps must be formatted 'OLD:NEW'"
273
+ raise argparse.ArgumentTypeError(msg)
274
+ (old, new) = mapping.split(":")
275
+ old = int(old) if old.isnumeric() else old
276
+ mapper[old] = new
277
+ except csv.Error as e:
278
+ msg = f"'{arg}': malformed CSV string"
279
+ raise argparse.ArgumentTypeError(msg) from e
280
+ else:
281
+ return mapper
282
+
283
+
284
+ def parse_column_transforms(arg: str) -> list[ColumnTransform]:
285
+ """Parse a list of column renames in Excel-compatible CSV format."""
286
+ transforms = []
287
+ try:
288
+ for transform in next(csv.reader([arg], strict=True)):
289
+ m = re.match(r"(.+)=(\w+):(.+)", transform)
290
+ if not m:
291
+ msg = f"'{transform}': invalid transformation format"
292
+ raise argparse.ArgumentTypeError(msg)
293
+ dest = m.group(1)
294
+ func = m.group(2).lower()
295
+ source = m.group(3)
296
+ if func not in TRANSFORMERS:
297
+ msg = f"'{m.group(2)}': invalid transformation"
298
+ raise argparse.ArgumentTypeError(msg)
299
+ transforms.append(TRANSFORMERS[func.lower()](source, dest))
300
+ except csv.Error as e:
301
+ msg = f"'{arg}': malformed CSV string"
302
+ raise argparse.ArgumentTypeError(msg) from e
303
+ else:
304
+ return transforms
305
+
306
+
307
+ def command_line_parser() -> argparse.ArgumentParser:
308
+ """Create a command-line argument parser and return parsed arguments."""
309
+ parser = argparse.ArgumentParser()
310
+ parser.add_argument("-V", "--version", action="store_true")
311
+ parser.add_argument(
312
+ "--whitespace",
313
+ required=False,
314
+ action="store_true",
315
+ help="strip whitespace from beginning and end of strings and "
316
+ "collapse other whitespace into single space (default: false)",
317
+ )
318
+ parser.add_argument(
319
+ "--reverse",
320
+ required=False,
321
+ action="store_true",
322
+ help="reverse the order of the data rows (default: false)",
323
+ )
324
+ parser.add_argument(
325
+ "--no-header",
326
+ required=False,
327
+ action="store_true",
328
+ help="CSV file has no header row (default: false)",
329
+ )
330
+ parser.add_argument(
331
+ "--day-first",
332
+ required=False,
333
+ action="store_true",
334
+ help="dates are represented day first in the CSV file (default: false)",
335
+ )
336
+ parser.add_argument(
337
+ "--date",
338
+ metavar="COLUMNS",
339
+ type=parse_columns,
340
+ help="comma-separated list of column names/indexes to parse as dates",
341
+ )
342
+ parser.add_argument(
343
+ "--rename",
344
+ metavar="COLUMNS-MAP",
345
+ type=parse_column_renames,
346
+ help="comma-separated list of column names/indexes to renamed as 'OLD:NEW'",
347
+ )
348
+ parser.add_argument(
349
+ "--transform",
350
+ metavar="COLUMNS-MAP",
351
+ type=parse_column_transforms,
352
+ help="comma-separated list of column names/indexes to transform as 'NEW:FUNC=OLD'",
353
+ )
354
+ parser.add_argument(
355
+ "--delete",
356
+ metavar="COLUMNS",
357
+ type=parse_columns,
358
+ help="comma-separated list of column names/indexes to delete",
359
+ )
360
+ parser.add_argument(
361
+ "-o",
362
+ "--output",
363
+ nargs="*",
364
+ metavar="FILENAME",
365
+ help="output filename (default: use source file with .numbers)",
366
+ )
367
+ parser.add_argument("csvfile", nargs="*", help="CSV file to convert")
368
+ return parser
369
+
370
+
371
+ def main() -> None:
372
+ """Convert the document and exit."""
373
+ parser = command_line_parser()
374
+ args = parser.parse_args()
375
+
376
+ if args.version:
377
+ print(_get_version())
378
+ exit(0)
379
+ elif len(args.csvfile) == 0:
380
+ print("At least one CSV file is required", file=stderr)
381
+ parser.print_help(stderr)
382
+ exit(1)
383
+
384
+ if args.output is None:
385
+ output_filenames = [Path(x).with_suffix(".numbers") for x in args.csvfile]
386
+ else:
387
+ output_filenames = args.output
388
+
389
+ if len(args.csvfile) != len(output_filenames):
390
+ print("The numbers of input and output file names do not match", file=stderr)
391
+ exit(1)
392
+
393
+ try:
394
+ for input_filename, output_filename in zip(args.csvfile, output_filenames):
395
+ converter = Converter(
396
+ day_first=args.day_first,
397
+ no_header=args.no_header,
398
+ whitespace=args.whitespace,
399
+ reverse=args.reverse,
400
+ date_columns=args.date,
401
+ input_filename=input_filename,
402
+ output_filename=output_filename,
403
+ )
404
+
405
+ converter.transform_columns(args.transform)
406
+ converter.rename_columns(args.rename)
407
+ converter.delete_columns(args.delete)
408
+ converter.save()
409
+ except RuntimeError as e:
410
+ print(e, file=stderr)
411
+ exit(1)
412
+
413
+
414
+ if __name__ == "__main__": # pragma: no cover
415
+ # execute only if run as a script
416
+ main()
@@ -1329,6 +1329,8 @@ class Cell(CellStorageFlags, Cacheable):
1329
1329
  is_currency: bool = False,
1330
1330
  ) -> None:
1331
1331
  self._is_currency = is_currency
1332
+ if is_currency:
1333
+ self._type = CellType.CURRENCY
1332
1334
  if format_type == FormattingType.CURRENCY:
1333
1335
  self._currency_format_id = format_id
1334
1336
  elif format_type == FormattingType.TICKBOX:
@@ -329,6 +329,10 @@ class _NumbersModel(Cacheable):
329
329
  if enabled is not None:
330
330
  table_info.super.caption_hidden = not enabled
331
331
  else:
332
+ caption_info_id = table_info.super.caption.identifier
333
+ caption_archive = self.objects[caption_info_id]
334
+ if caption_archive.DESCRIPTOR.name == "StandinCaptionArchive":
335
+ return False
332
336
  return not table_info.super.caption_hidden
333
337
 
334
338
  def find_style_id(self, style_substr: str):
@@ -482,6 +486,8 @@ class _NumbersModel(Cacheable):
482
486
  if caption is not None:
483
487
  clear_field_container(self.objects[caption_storage_id].text)
484
488
  self.objects[caption_storage_id].text.append(caption)
489
+ elif len(self.objects[caption_storage_id].text) == 0:
490
+ return "Caption"
485
491
  else:
486
492
  return self.objects[caption_storage_id].text[0]
487
493
 
@@ -1481,6 +1487,7 @@ class _NumbersModel(Cacheable):
1481
1487
  table_info_id, location="Document", component_id=self.calc_engine_id()
1482
1488
  )
1483
1489
  self.create_caption_archive(table_model_id)
1490
+ self.caption_enabled(table_model_id, False)
1484
1491
 
1485
1492
  self.add_formula_owner(
1486
1493
  table_info_id,