edjas 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edjas/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ import argparse
2
+ import json
3
+ import sys
4
+
5
+ from importlib.metadata import PackageNotFoundError, version
6
+
7
+ from .read_params import read_file
8
+
9
+ try:
10
+ __version__ = version("edjas")
11
+ except PackageNotFoundError: # not installed (e.g. running from a source checkout)
12
+ __version__ = "0.0.0+unknown"
13
+
14
+ __all__ = ["read_file", "__version__"]
15
+
16
+ def main(argv=None):
17
+ parser = argparse.ArgumentParser(
18
+ prog="edjas",
19
+ description="Extract data in JSON from any spreadsheet.",
20
+ )
21
+ parser.add_argument("file", help="path to the spreadsheet to read")
22
+ parser.add_argument(
23
+ "-r",
24
+ "--range",
25
+ default="Parameters",
26
+ help="named range to use as the starting point (default: Parameters)",
27
+ )
28
+ parser.add_argument(
29
+ "--version",
30
+ action="version",
31
+ version=f"%(prog)s {__version__}",
32
+ )
33
+ args = parser.parse_args(argv)
34
+ json.dump(read_file(args.file, args.range), sys.stdout)
35
+ sys.stdout.write("\n")
edjas/find_first.py ADDED
@@ -0,0 +1,49 @@
1
+ from openpyxl import Workbook
2
+ from openpyxl.cell import Cell
3
+ from openpyxl.worksheet.worksheet import Worksheet
4
+
5
+
6
+ def column(sheet: Worksheet) -> Cell:
7
+ """
8
+ Return the first cell of the first non-blank column in the sheet.
9
+
10
+ Args:
11
+ sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
12
+
13
+ Returns:
14
+ openpyxl.cell.Cell: The first non-blank cell scanning column by column,
15
+ or None if every cell is blank.
16
+ """
17
+ for col in sheet.iter_cols():
18
+ for cell in col:
19
+ if cell is not None and cell.value: # Check if the cell is not empty
20
+ return cell
21
+
22
+ def row(sheet: Worksheet) -> Cell:
23
+ """
24
+ Return the first cell of the first non-blank row in the sheet.
25
+
26
+ Args:
27
+ sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
28
+
29
+ Returns:
30
+ openpyxl.cell.Cell: The first non-blank cell scanning row by row,
31
+ or None if every cell is blank.
32
+ """
33
+ for row in sheet.iter_rows():
34
+ for cell in row:
35
+ if cell is not None and cell.value: # Check if the cell is not empty
36
+ return cell
37
+
38
+ def top_left(sheet: Worksheet) -> Cell:
39
+ """
40
+ Return the top-left cell of the sheet's used data range.
41
+
42
+ Args:
43
+ sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
44
+
45
+ Returns:
46
+ openpyxl.cell.Cell: The cell at the top-left corner of the used range.
47
+ """
48
+ data_range = sheet.calculate_dimension()
49
+ return sheet[data_range][0][0]
edjas/read_params.py ADDED
@@ -0,0 +1,74 @@
1
+ import sys
2
+
3
+ import openpyxl
4
+
5
+ def extract_values(sheet, range_spec, flatten=True):
6
+ result = []
7
+ for row in sheet[range_spec]:
8
+ result.append([c.value for c in row])
9
+ if not flatten:
10
+ return result
11
+ if len(result) == 1:
12
+ return result[0]
13
+ elif len(result[0]) == 1:
14
+ return [r[0] for r in result]
15
+ else:
16
+ return result
17
+
18
+ def range_values(wb, range_spec, flatten=True):
19
+ if range_spec in wb.defined_names:
20
+ range_spec = wb.defined_names[range_spec].attr_text
21
+ if "!" in range_spec:
22
+ sheet_name, cell_refs = range_spec.split("!")
23
+ sheet = wb[sheet_name]
24
+ else:
25
+ sheet = wb.active
26
+ cell_refs = range_spec
27
+ return extract_values(sheet, cell_refs, flatten=flatten)
28
+
29
+ def range_to_dict(workbook, range_spec):
30
+ # Get the rows in the given range
31
+ rows = range_values(workbook, range_spec, flatten=False)
32
+ if len(rows[0]) != 2:
33
+ raise ValueError(f"Range spec {range_spec} should have two columns")
34
+ # Initialize the result dictionary
35
+ result = {}
36
+ i = 0
37
+ for key, value in rows:
38
+ # Skip empty rows, but complain about floating values
39
+ if key is None:
40
+ if value is None:
41
+ continue
42
+ else:
43
+ raise ValueError("Empty key not expected on value {value!r} - programming error?")
44
+ # Check if the value is a range name enclosed in braces: dictionary
45
+ if type(value) is str:
46
+ if value.startswith("{") and value.endswith("}"):
47
+ # Extract the named range name (e.g., "SubParameters" from "{SubParameters}")
48
+ ref_range_spec = value[1:-1]
49
+ # Recursively process the referenced named range
50
+ result[key] = range_to_dict(workbook, ref_range_spec)
51
+ # Otherwise "[range]" references a list or matrix.
52
+ elif value.startswith("[") and value.endswith("]"):
53
+ ref_range_spec = value[1:-1]
54
+ result[key] = range_values(workbook, ref_range_spec)
55
+ else:
56
+ result[key] = value
57
+ else:
58
+ # Single value
59
+ result[key] = value
60
+ return result
61
+
62
+ def read_file(file_name, range_name="Parameters"):
63
+ # Load the Excel workbook
64
+ workbook = openpyxl.load_workbook(file_name, data_only=False)
65
+ return range_to_dict(workbook, range_name)
66
+
67
+ if __name__ == '__main__':
68
+ if len(sys.argv) < 2:
69
+ sys.exit("Requires spreadsheet arguments")
70
+ if len(sys.argv) > 3:
71
+ sys.exit("Sorry, only handling one or two arguments for now")
72
+ from pprint import pprint
73
+ data = read_file(*sys.argv[1:])
74
+ pprint(data)
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.4
2
+ Name: edjas
3
+ Version: 0.5.1
4
+ Summary: Extract data in JSON from any spreadsheet
5
+ Author-email: Steve Holden <steve@holdenweb.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: data-extraction,excel,json,openpyxl,reporting,spreadsheet,xlsx
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Office/Business :: Financial :: Spreadsheet
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
22
+ Description-Content-Type: text/markdown
23
+
24
+ # EDJAS: Extract Data in JSON from Any Spreadsheet
25
+
26
+ - Sources at https://github.com/holdenweb/edjas
27
+
28
+ This project is an attempt to help organisations that insist on managing
29
+ their businesses, or major aspects thereof, using spreadsheets.
30
+ Many articles have been written on the limitations of spreadsheet technology.
31
+ If you have any doubts then look at the "The Problem with Spreadsheets"
32
+ section of this [LinkedIn
33
+ article](https://www.linkedin.com/pulse/spreadsheets-inadequate-effective-management--gjsse/).
34
+ Some large organisations are now
35
+ [providing advice](https://www.gov.uk/guidance/creating-and-sharing-spreadsheets)
36
+ — although in many cases better advice might be:
37
+ _stop using spreadsheets for that_!
38
+
39
+ Rather than try to change the way people do business (imagine "If I Ruled the
40
+ World" playing softly in the background), EDJAS is intended to help people extract
41
+ that locked-up data more effectively, in simple and easy-to-understand ways
42
+ that don't affect existing workflows.
43
+
44
+ It lets you add data specifications to any existing spreadsheet by creating
45
+ named ranges in the spreadsheet. By default EDJAS will look for a range
46
+ name `Parameters` as its starting point, although this can be overridden on the command line.
47
+ This range should be precisely two columns wide, and EDJAS
48
+ treats the left-hand column as names and the right-hand column as values.
49
+ Normally, the values are used literally after extraction from the spreadsheet.
50
+ Two formats for the value are given special treatment.
51
+
52
+ - `[range-name]`: the named range is exported as a JSON list or, if it's two-dimensional a list of row lists.
53
+ - `{range_name}`: The named range, which must be two columns wide, becomes a JSON object where the left-hand column specifies
54
+ the names and the right-hand column specifies the values.
55
+
56
+ The parameter details are used to extract data from the spreadsheet, which is then sent to standard output as JSON.
57
+
58
+ ![Parameter specifications in EDJAS](images/parameters.png "Parameter specifications in EDJAS")
59
+
60
+ In the example shown, the `version` key has a dict value, and in that dict the `number` key has a value of "1.0.2".
61
+ The version number can therefore be referenced in the JSON output as `version.number`. The output from this example is shown below.
62
+
63
+
64
+ ![Parameter data extracted from a spreadsheet](images/json.png "The parameter data")
65
+
66
+ A demonstration of the system can be found at [https://github.com/holdenweb/edjas-demo](https://github.com/holdenweb/edjas-demo).
67
+
68
+ This is particularly useful for audiences that have an interest in only a
69
+ limited number of features from a possibly quite large spreadsheet.
70
+ More generally, JSON is such a widely used format that spreadsheet data can
71
+ be re-used in a wide range of systems as appropriate.
@@ -0,0 +1,8 @@
1
+ edjas/__init__.py,sha256=HGHNJ0IH45E5WiQGJ-wfWN4JVRzWql_xSqgB4uf1u98,977
2
+ edjas/find_first.py,sha256=t7zJQUobjhqYkj3Ahl1XQeff38jHrHr1HTRqMzWFJFY,1525
3
+ edjas/read_params.py,sha256=4LP6JAbNt-Y1RRTun3Tr0Ex5EzaE4XrXSPvXNvxq69M,2660
4
+ edjas-0.5.1.dist-info/METADATA,sha256=mz51Sym-A-r7o5sMjMs469DGqbSL89Mibx77kB8Q4mw,3711
5
+ edjas-0.5.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
6
+ edjas-0.5.1.dist-info/entry_points.txt,sha256=J1HXWdLo-WZ0TLUgEk16uDxFUZ8kvvB6Q5z_1fYXomA,37
7
+ edjas-0.5.1.dist-info/licenses/LICENSE,sha256=YdiT5URXWh-yGtoqz8kfS199rVB7_lBvivJfuRyOfTc,1074
8
+ edjas-0.5.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ edjas = edjas:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Steve Holden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.