edjas 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edjas/__init__.py +35 -0
- edjas/find_first.py +49 -0
- edjas/read_params.py +74 -0
- edjas-0.5.1.dist-info/METADATA +71 -0
- edjas-0.5.1.dist-info/RECORD +8 -0
- edjas-0.5.1.dist-info/WHEEL +4 -0
- edjas-0.5.1.dist-info/entry_points.txt +2 -0
- edjas-0.5.1.dist-info/licenses/LICENSE +21 -0
edjas/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
6
|
+
|
|
7
|
+
from .read_params import read_file
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
__version__ = version("edjas")
|
|
11
|
+
except PackageNotFoundError: # not installed (e.g. running from a source checkout)
|
|
12
|
+
__version__ = "0.0.0+unknown"
|
|
13
|
+
|
|
14
|
+
__all__ = ["read_file", "__version__"]
|
|
15
|
+
|
|
16
|
+
def main(argv=None):
|
|
17
|
+
parser = argparse.ArgumentParser(
|
|
18
|
+
prog="edjas",
|
|
19
|
+
description="Extract data in JSON from any spreadsheet.",
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument("file", help="path to the spreadsheet to read")
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"-r",
|
|
24
|
+
"--range",
|
|
25
|
+
default="Parameters",
|
|
26
|
+
help="named range to use as the starting point (default: Parameters)",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--version",
|
|
30
|
+
action="version",
|
|
31
|
+
version=f"%(prog)s {__version__}",
|
|
32
|
+
)
|
|
33
|
+
args = parser.parse_args(argv)
|
|
34
|
+
json.dump(read_file(args.file, args.range), sys.stdout)
|
|
35
|
+
sys.stdout.write("\n")
|
edjas/find_first.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from openpyxl import Workbook
|
|
2
|
+
from openpyxl.cell import Cell
|
|
3
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def column(sheet: Worksheet) -> Cell:
|
|
7
|
+
"""
|
|
8
|
+
Return the first cell of the first non-blank column in the sheet.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
openpyxl.cell.Cell: The first non-blank cell scanning column by column,
|
|
15
|
+
or None if every cell is blank.
|
|
16
|
+
"""
|
|
17
|
+
for col in sheet.iter_cols():
|
|
18
|
+
for cell in col:
|
|
19
|
+
if cell is not None and cell.value: # Check if the cell is not empty
|
|
20
|
+
return cell
|
|
21
|
+
|
|
22
|
+
def row(sheet: Worksheet) -> Cell:
|
|
23
|
+
"""
|
|
24
|
+
Return the first cell of the first non-blank row in the sheet.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
openpyxl.cell.Cell: The first non-blank cell scanning row by row,
|
|
31
|
+
or None if every cell is blank.
|
|
32
|
+
"""
|
|
33
|
+
for row in sheet.iter_rows():
|
|
34
|
+
for cell in row:
|
|
35
|
+
if cell is not None and cell.value: # Check if the cell is not empty
|
|
36
|
+
return cell
|
|
37
|
+
|
|
38
|
+
def top_left(sheet: Worksheet) -> Cell:
|
|
39
|
+
"""
|
|
40
|
+
Return the top-left cell of the sheet's used data range.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
sheet (openpyxl.worksheet.worksheet.Worksheet): The worksheet to search.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
openpyxl.cell.Cell: The cell at the top-left corner of the used range.
|
|
47
|
+
"""
|
|
48
|
+
data_range = sheet.calculate_dimension()
|
|
49
|
+
return sheet[data_range][0][0]
|
edjas/read_params.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import openpyxl
|
|
4
|
+
|
|
5
|
+
def extract_values(sheet, range_spec, flatten=True):
|
|
6
|
+
result = []
|
|
7
|
+
for row in sheet[range_spec]:
|
|
8
|
+
result.append([c.value for c in row])
|
|
9
|
+
if not flatten:
|
|
10
|
+
return result
|
|
11
|
+
if len(result) == 1:
|
|
12
|
+
return result[0]
|
|
13
|
+
elif len(result[0]) == 1:
|
|
14
|
+
return [r[0] for r in result]
|
|
15
|
+
else:
|
|
16
|
+
return result
|
|
17
|
+
|
|
18
|
+
def range_values(wb, range_spec, flatten=True):
|
|
19
|
+
if range_spec in wb.defined_names:
|
|
20
|
+
range_spec = wb.defined_names[range_spec].attr_text
|
|
21
|
+
if "!" in range_spec:
|
|
22
|
+
sheet_name, cell_refs = range_spec.split("!")
|
|
23
|
+
sheet = wb[sheet_name]
|
|
24
|
+
else:
|
|
25
|
+
sheet = wb.active
|
|
26
|
+
cell_refs = range_spec
|
|
27
|
+
return extract_values(sheet, cell_refs, flatten=flatten)
|
|
28
|
+
|
|
29
|
+
def range_to_dict(workbook, range_spec):
|
|
30
|
+
# Get the rows in the given range
|
|
31
|
+
rows = range_values(workbook, range_spec, flatten=False)
|
|
32
|
+
if len(rows[0]) != 2:
|
|
33
|
+
raise ValueError(f"Range spec {range_spec} should have two columns")
|
|
34
|
+
# Initialize the result dictionary
|
|
35
|
+
result = {}
|
|
36
|
+
i = 0
|
|
37
|
+
for key, value in rows:
|
|
38
|
+
# Skip empty rows, but complain about floating values
|
|
39
|
+
if key is None:
|
|
40
|
+
if value is None:
|
|
41
|
+
continue
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError("Empty key not expected on value {value!r} - programming error?")
|
|
44
|
+
# Check if the value is a range name enclosed in braces: dictionary
|
|
45
|
+
if type(value) is str:
|
|
46
|
+
if value.startswith("{") and value.endswith("}"):
|
|
47
|
+
# Extract the named range name (e.g., "SubParameters" from "{SubParameters}")
|
|
48
|
+
ref_range_spec = value[1:-1]
|
|
49
|
+
# Recursively process the referenced named range
|
|
50
|
+
result[key] = range_to_dict(workbook, ref_range_spec)
|
|
51
|
+
# Otherwise "[range]" references a list or matrix.
|
|
52
|
+
elif value.startswith("[") and value.endswith("]"):
|
|
53
|
+
ref_range_spec = value[1:-1]
|
|
54
|
+
result[key] = range_values(workbook, ref_range_spec)
|
|
55
|
+
else:
|
|
56
|
+
result[key] = value
|
|
57
|
+
else:
|
|
58
|
+
# Single value
|
|
59
|
+
result[key] = value
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
def read_file(file_name, range_name="Parameters"):
|
|
63
|
+
# Load the Excel workbook
|
|
64
|
+
workbook = openpyxl.load_workbook(file_name, data_only=False)
|
|
65
|
+
return range_to_dict(workbook, range_name)
|
|
66
|
+
|
|
67
|
+
if __name__ == '__main__':
|
|
68
|
+
if len(sys.argv) < 2:
|
|
69
|
+
sys.exit("Requires spreadsheet arguments")
|
|
70
|
+
if len(sys.argv) > 3:
|
|
71
|
+
sys.exit("Sorry, only handling one or two arguments for now")
|
|
72
|
+
from pprint import pprint
|
|
73
|
+
data = read_file(*sys.argv[1:])
|
|
74
|
+
pprint(data)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: edjas
|
|
3
|
+
Version: 0.5.1
|
|
4
|
+
Summary: Extract data in JSON from any spreadsheet
|
|
5
|
+
Author-email: Steve Holden <steve@holdenweb.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: data-extraction,excel,json,openpyxl,reporting,spreadsheet,xlsx
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Topic :: Office/Business :: Financial :: Spreadsheet
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: openpyxl<4.0.0,>=3.1.5
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# EDJAS: Extract Data in JSON from Any Spreadsheet
|
|
25
|
+
|
|
26
|
+
- Sources at https://github.com/holdenweb/edjas
|
|
27
|
+
|
|
28
|
+
This project is an attempt to help organisations that insist on managing
|
|
29
|
+
their businesses, or major aspects thereof, using spreadsheets.
|
|
30
|
+
Many articles have been written on the limitations of spreadsheet technology.
|
|
31
|
+
If you have any doubts then look at the "The Problem with Spreadsheets"
|
|
32
|
+
section of this [LinkedIn
|
|
33
|
+
article](https://www.linkedin.com/pulse/spreadsheets-inadequate-effective-management--gjsse/).
|
|
34
|
+
Some large organisations are now
|
|
35
|
+
[providing advice](https://www.gov.uk/guidance/creating-and-sharing-spreadsheets)
|
|
36
|
+
— although in many cases better advice might be:
|
|
37
|
+
_stop using spreadsheets for that_!
|
|
38
|
+
|
|
39
|
+
Rather than try to change the way people do business (imagine "If I Ruled the
|
|
40
|
+
World" playing softly in the background), EDJAS is intended to help people extract
|
|
41
|
+
that locked-up data more effectively, in simple and easy-to-understand ways
|
|
42
|
+
that don't affect existing workflows.
|
|
43
|
+
|
|
44
|
+
It lets you add data specifications to any existing spreadsheet by creating
|
|
45
|
+
named ranges in the spreadsheet. By default EDJAS will look for a range
|
|
46
|
+
name `Parameters` as its starting point, although this can be overridden on the command line.
|
|
47
|
+
This range should be precisely two columns wide, and EDJAS
|
|
48
|
+
treats the left-hand column as names and the right-hand column as values.
|
|
49
|
+
Normally, the values are used literally after extraction from the spreadsheet.
|
|
50
|
+
Two formats for the value are given special treatment.
|
|
51
|
+
|
|
52
|
+
- `[range-name]`: the named range is exported as a JSON list or, if it's two-dimensional a list of row lists.
|
|
53
|
+
- `{range_name}`: The named range, which must be two columns wide, becomes a JSON object where the left-hand column specifies
|
|
54
|
+
the names and the right-hand column specifies the values.
|
|
55
|
+
|
|
56
|
+
The parameter details are used to extract data from the spreadsheet, which is then sent to standard output as JSON.
|
|
57
|
+
|
|
58
|
+

|
|
59
|
+
|
|
60
|
+
In the example shown, the `version` key has a dict value, and in that dict the `number` key has a value of "1.0.2".
|
|
61
|
+
The version number can therefore be referenced in the JSON output as `version.number`. The output from this example is shown below.
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+

|
|
65
|
+
|
|
66
|
+
A demonstration of the system can be found at [https://github.com/holdenweb/edjas-demo](https://github.com/holdenweb/edjas-demo).
|
|
67
|
+
|
|
68
|
+
This is particularly useful for audiences that have an interest in only a
|
|
69
|
+
limited number of features from a possibly quite large spreadsheet.
|
|
70
|
+
More generally, JSON is such a widely used format that spreadsheet data can
|
|
71
|
+
be re-used in a wide range of systems as appropriate.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
edjas/__init__.py,sha256=HGHNJ0IH45E5WiQGJ-wfWN4JVRzWql_xSqgB4uf1u98,977
|
|
2
|
+
edjas/find_first.py,sha256=t7zJQUobjhqYkj3Ahl1XQeff38jHrHr1HTRqMzWFJFY,1525
|
|
3
|
+
edjas/read_params.py,sha256=4LP6JAbNt-Y1RRTun3Tr0Ex5EzaE4XrXSPvXNvxq69M,2660
|
|
4
|
+
edjas-0.5.1.dist-info/METADATA,sha256=mz51Sym-A-r7o5sMjMs469DGqbSL89Mibx77kB8Q4mw,3711
|
|
5
|
+
edjas-0.5.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
6
|
+
edjas-0.5.1.dist-info/entry_points.txt,sha256=J1HXWdLo-WZ0TLUgEk16uDxFUZ8kvvB6Q5z_1fYXomA,37
|
|
7
|
+
edjas-0.5.1.dist-info/licenses/LICENSE,sha256=YdiT5URXWh-yGtoqz8kfS199rVB7_lBvivJfuRyOfTc,1074
|
|
8
|
+
edjas-0.5.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026 Steve Holden
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|