PyPI - ebia - Versions diffs - 0.1.4__py3-none-any.whl - Mend

ebia 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ebia/__init__.py +4 -0
ebia/cli.py +125 -0
ebia/parser.py +115 -0
ebia/xls_generator.py +282 -0
ebia-0.1.4.dist-info/METADATA +147 -0
ebia-0.1.4.dist-info/RECORD +9 -0
ebia-0.1.4.dist-info/WHEEL +5 -0
ebia-0.1.4.dist-info/entry_points.txt +2 -0
ebia-0.1.4.dist-info/top_level.txt +1 -0

ebia/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .parser import extract_invoice_fields
+__all__ = ["extract_invoice_fields"]
+__version__ = "0.1.0"

ebia/cli.py ADDED Viewed

@@ -0,0 +1,125 @@
+from __future__ import annotations
+import argparse
+from pathlib import Path
+from .parser import extract_invoice_fields
+from .xls_generator import (
+    generate_xlsx_by_month_day,
+    generate_xlsx_single,
+)
+def main(argv=None) -> int:
+    ap = argparse.ArgumentParser(prog="ebia")
+    ap.add_argument("--path", required=True, help="PDF file path OR a folder containing PDFs")
+    ap.add_argument(
+        "--out",
+        default=None,
+        help=(
+            "Single-PDF mode: output .xlsx file path (default: output.xlsx). "
+            "Folder mode: output directory where one YYYY-MM.xlsx is created per month "
+            "(default: current directory)."
+        ),
+    )
+    ap.add_argument("--tva", type=float, default=0.20, help="TVA rate (default 0.20)")
+    # Incrémentation pour dossier
+    ap.add_argument("--start-piece", type=int, default=1, help="Starting Pièce number (default 1)")
+    ap.add_argument(
+        "--start-document", type=int, default=1, help="Starting Document number (default 1)"
+    )
+    ap.add_argument("--piece-width", type=int, default=4, help="Pièce zero-fill width (default 4)")
+    ap.add_argument(
+        "--document-width", type=int, default=5, help="Document zero-fill width (default 5)"
+    )
+    # Pour un seul PDF (optionnel : override)
+    ap.add_argument("--piece", default=None, help="Override Pièce for single PDF (e.g. 0000)")
+    ap.add_argument(
+        "--document", default=None, help="Override Document for single PDF (e.g. 00000)"
+    )
+    ap.add_argument("--no-headers", action="store_true", help="Generate Excel without header row")
+    ap.add_argument(
+        "--recursive", action="store_true", help="Scan subfolders for PDFs (folder mode)"
+    )
+    args = ap.parse_args(argv)
+    p = Path(args.path)
+    include_headers = not args.no_headers
+    if p.is_file():
+        # Single PDF mode
+        data = extract_invoice_fields(str(p))
+        out_file = args.out if args.out is not None else "output.xlsx"
+        # piece/document: si non fournis, on utilise start_* avec widths
+        if args.piece is None:
+            piece = str(args.start_piece).zfill(args.piece_width)
+        else:
+            piece = args.piece
+        if args.document is None:
+            document = str(args.start_document).zfill(args.document_width)
+        else:
+            document = args.document
+        generate_xlsx_single(
+            data,
+            out_file,
+            piece=piece,
+            document=document,
+            tva_rate=args.tva,
+            include_headers=include_headers,
+        )
+        print(f"OK (single) -> {out_file}")
+        return 0
+    if p.is_dir():
+        # Folder mode
+        if args.recursive:
+            pdfs = sorted(p.rglob("*.pdf")) + sorted(p.rglob("*.PDF"))
+        else:
+            pdfs = sorted(p.glob("*.pdf")) + sorted(p.glob("*.PDF"))
+        if not pdfs:
+            raise SystemExit(f"No PDF files found in folder: {p}")
+        invoice_dicts = []
+        skipped = 0
+        for pdf in pdfs:
+            try:
+                invoice_dicts.append(extract_invoice_fields(str(pdf)))
+            except Exception as e:
+                skipped += 1
+                print(f"[SKIP] {pdf}: {e}")
+        out_dir = args.out if args.out is not None else "./reports"
+        generated = generate_xlsx_by_month_day(
+            invoice_dicts,
+            out_dir,
+            start_piece=args.start_piece,
+            start_document=args.start_document,
+            piece_width=args.piece_width,
+            document_width=args.document_width,
+            tva_rate=args.tva,
+            include_headers=include_headers,
+        )
+        print(f"OK (folder) | parsed={len(invoice_dicts)} skipped={skipped}")
+        for f in generated:
+            print(f"  -> {f}")
+        return 0
+    raise SystemExit(f"Invalid path: {p}")
+if __name__ == "__main__":
+    raise SystemExit(main())

ebia/parser.py ADDED Viewed

@@ -0,0 +1,115 @@
+import re
+from datetime import datetime
+import pdfplumber
+MONTHS_FR = {
+    "janvier": 1,
+    "février": 2,
+    "fevrier": 2,
+    "mars": 3,
+    "avril": 4,
+    "mai": 5,
+    "juin": 6,
+    "juillet": 7,
+    "août": 8,
+    "aout": 8,
+    "septembre": 9,
+    "octobre": 10,
+    "novembre": 11,
+    "décembre": 12,
+    "decembre": 12,
+}
+__all__ = ["extract_invoice_fields"]
+def normalize(value: str) -> float:
+    value = (
+        value.replace("\u00a0", " ")  # NBSP → space
+        .replace("\u202f", " ")  # narrow no-break space → space
+        .replace("\n", "")  # PDF line-wrap artefact
+        .replace(" ", "")  # remove thousand-separator spaces
+        .replace(",", ".")
+    )  # French decimal comma → point
+    return float(value)
+def parse_french_date(date_str: str) -> str | None:
+    s = date_str.lower()
+    s = (
+        s.replace("é", "e")
+        .replace("è", "e")
+        .replace("ê", "e")
+        .replace("à", "a")
+        .replace("â", "a")
+        .replace("î", "i")
+        .replace("ï", "i")
+        .replace("ô", "o")
+        .replace("û", "u")
+        .replace("ù", "u")
+        .replace("ç", "c")
+    )
+    m = re.search(r"(\d{1,2})\s+([a-z]+)\s+(\d{4})", s)
+    if not m:
+        return None
+    day, month_name, year = int(m.group(1)), m.group(2), int(m.group(3))
+    month = MONTHS_FR.get(month_name)
+    if not month:
+        return None
+    return datetime(year, month, day).strftime("%Y-%m-%d")
+def extract_total_ttc(text: str) -> float | None:
+    block = re.search(
+        r"Total\s*€\s*HT.*?Total\s*€\s*TTC(.*)", text, flags=re.IGNORECASE | re.DOTALL
+    )
+    if not block:
+        return None
+    zone = block.group(1)
+    amounts = re.findall(r"(\d{1,3}(?:[\s\u00A0\u202F]\d{3})*,\d{2})", zone)
+    if len(amounts) < 3:
+        return None
+    return normalize(amounts[2])
+def extract_invoice_fields(pdf_path: str) -> dict:
+    with pdfplumber.open(pdf_path) as pdf:
+        full_text = "\n".join((p.extract_text() or "") for p in pdf.pages)
+    ref_match = re.search(r"Référence\s*:\s*([^\r\n]+)", full_text, flags=re.IGNORECASE)
+    reference_line = ref_match.group(1).strip() if ref_match else None
+    reference = None
+    if reference_line:
+        reference = re.split(
+            r"\s+\ble\s+\d{1,2}\s+[A-Za-zÀ-ÿ]+\s+\d{4}\b", reference_line, maxsplit=1
+        )[0].strip()
+    date_match = re.search(
+        r"\ble\s+(\d{1,2}\s+[A-Za-zÀ-ÿ]+\s+\d{4})", full_text, flags=re.IGNORECASE
+    )
+    date_iso = parse_french_date(date_match.group(1)) if date_match else None
+    total_ttc = extract_total_ttc(full_text)
+    return {"Client": reference, "date": date_iso, "total_ttc": total_ttc}
+def main(argv=None) -> int:  # pragma: no cover
+    import argparse
+    ap = argparse.ArgumentParser(prog="equipebaie-parse")
+    ap.add_argument("pdf", help="Chemin vers la facture PDF")
+    args = ap.parse_args(argv)
+    print(extract_invoice_fields(args.pdf))
+    return 0
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

ebia/xls_generator.py ADDED Viewed

@@ -0,0 +1,282 @@
+from __future__ import annotations
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import date, datetime, time
+from pathlib import Path
+from typing import Any
+from openpyxl import Workbook
+from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
+from openpyxl.utils import get_column_letter
+# En-têtes
+HEADERS = [
+    "Statut",
+    "Jour",
+    "Pièce",
+    "Document",
+    "Compte général",
+    "Compte auxiliaire",
+    "Libellé",
+    "Débit",
+    "Crédit",
+    "Date de l'échéance",
+    "Documents associés",
+]
+@dataclass
+class Invoice:
+    client: str
+    date_iso: str  # "YYYY-MM-DD"
+    total_ttc: float
+def _parse_iso_date(s: str) -> date:
+    return datetime.strptime(s, "%Y-%m-%d").date()
+def _money(x: float) -> float:
+    return round(x + 1e-12, 2)
+def _zfill_int(n: int, width: int) -> str:
+    return str(n).zfill(width)
+def invoice_to_rows(
+    inv: Invoice,
+    *,
+    piece: str,
+    document: str,
+    statut: str = "",
+    tva_rate: float = 0.20,
+) -> list[list[Any]]:
+    d = _parse_iso_date(inv.date_iso)  # date
+    jour_dt = datetime.combine(d, time.min)  # datetime (00:00:00)
+    ttc = _money(inv.total_ttc)
+    ht = _money(ttc / (1 + tva_rate))
+    tva = _money(ttc - ht)
+    # Colonnes: Statut, Jour, Pièce, Document, CG, CA, Libellé, Débit, Crédit, Date éch., Docs assoc.
+    return [
+        [statut, jour_dt, piece, document, "411", "", inv.client, ttc, "", d, ""],
+        [statut, jour_dt, piece, document, "44571", "", inv.client, "", tva, "", ""],
+        [statut, jour_dt, piece, document, "701", "", inv.client, "", ht, "", ""],
+    ]
+def style_as_table(ws) -> None:
+    thin = Side(style="thin")
+    border = Border(left=thin, right=thin, top=thin, bottom=thin)
+    header_fill = PatternFill("solid", fgColor="E6E6E6")
+    header_font = Font(bold=True)
+    header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
+    align_left = Alignment(horizontal="left", vertical="center")
+    align_center = Alignment(horizontal="center", vertical="center")
+    align_right = Alignment(horizontal="right", vertical="center")
+    max_row = ws.max_row
+    max_col = ws.max_column
+    # Header style (ligne 1)
+    for c in range(1, max_col + 1):
+        cell = ws.cell(row=1, column=c)
+        cell.fill = header_fill
+        cell.font = header_font
+        cell.alignment = header_align
+        cell.border = border
+    # Body: bordures + alignement par défaut
+    for r in range(2, max_row + 1):
+        for c in range(1, max_col + 1):
+            cell = ws.cell(row=r, column=c)
+            cell.border = border
+            cell.alignment = align_left
+    # Alignements par colonne (A..K)
+    center_cols = ["A", "B", "C", "D", "E", "F", "J", "K"]
+    right_cols = ["H", "I"]
+    left_cols = ["G"]
+    for col in center_cols:
+        for r in range(2, max_row + 1):
+            ws[f"{col}{r}"].alignment = align_center
+    for col in right_cols:
+        for r in range(2, max_row + 1):
+            ws[f"{col}{r}"].alignment = align_right
+    for col in left_cols:
+        for r in range(2, max_row + 1):
+            ws[f"{col}{r}"].alignment = align_left
+    # Formats
+    # Jour (B) : datetime
+    # Date échéance (J) : date
+    # Montants (H/I)
+    for r in range(2, max_row + 1):
+        ws[f"B{r}"].number_format = "dd/mm/yyyy hh:mm:ss"
+        ws[f"J{r}"].number_format = "dd/mm/yyyy"
+        ws[f"H{r}"].number_format = "#,##0.00"
+        ws[f"I{r}"].number_format = "#,##0.00"
+    # Largeurs (ajuste si besoin)
+    widths = {
+        "A": 10,
+        "B": 20,
+        "C": 8,
+        "D": 10,
+        "E": 14,
+        "F": 16,
+        "G": 40,
+        "H": 12,
+        "I": 12,
+        "J": 16,
+        "K": 20,
+    }
+    for col, w in widths.items():
+        ws.column_dimensions[col].width = w
+    # Hauteur header
+    ws.row_dimensions[1].height = 20
+    # Freeze + Filter
+    ws.freeze_panes = "A2"
+    ws.auto_filter.ref = f"A1:{get_column_letter(max_col)}{max_row}"
+def write_xlsx_many(
+    rows_all: list[list[Any]],
+    output_path: str,
+    *,
+    sheet_name: str = "EBIA",
+    include_headers: bool = True,
+) -> None:
+    wb = Workbook()
+    ws = wb.active
+    ws.title = sheet_name
+    # Header
+    if include_headers:
+        ws.append(HEADERS)
+    # Data
+    for r in rows_all:
+        ws.append(r)
+    # Style tableau (APRÈS tout)
+    if include_headers and ws.max_row >= 1 and ws.max_column >= 1:
+        style_as_table(ws)
+    wb.save(output_path)
+def generate_xlsx_single(
+    invoice_dict: dict,
+    output_path: str,
+    *,
+    piece: str,
+    document: str,
+    tva_rate: float = 0.20,
+    include_headers: bool = True,
+) -> None:
+    client = (invoice_dict.get("Client") or "").strip()
+    date_iso = (invoice_dict.get("date") or "").strip()
+    total_ttc = invoice_dict.get("total_ttc")
+    if not client:
+        raise ValueError("Missing 'Client'")
+    if not date_iso:
+        raise ValueError("Missing 'date' (expected YYYY-MM-DD)")
+    if total_ttc is None:
+        raise ValueError("Missing 'total_ttc'")
+    inv = Invoice(client=client, date_iso=date_iso, total_ttc=float(total_ttc))
+    rows = invoice_to_rows(inv, piece=piece, document=document, statut="", tva_rate=tva_rate)
+    write_xlsx_many(rows, output_path, include_headers=include_headers)
+def generate_xlsx_by_month_day(
+    invoice_dicts: list[dict],
+    output_dir: str,
+    *,
+    start_piece: int = 1,
+    start_document: int = 1,
+    piece_width: int = 4,
+    document_width: int = 5,
+    tva_rate: float = 0.20,
+    include_headers: bool = True,
+) -> list[str]:
+    """Generate one .xlsx per month, with one sheet per day inside each workbook.
+    Invoices are sorted by date before numbering so piece/document counters
+    are assigned in chronological order across all months.
+    Returns the list of generated file paths.
+    """
+    # --- 1. Parse & validate all invoice dicts ---
+    valid: list[tuple[Invoice, date]] = []
+    for inv_dict in invoice_dicts:
+        client = (inv_dict.get("Client") or "").strip()
+        date_iso = (inv_dict.get("date") or "").strip()
+        total_ttc = inv_dict.get("total_ttc")
+        if not client or not date_iso or total_ttc is None:
+            continue
+        inv = Invoice(client=client, date_iso=date_iso, total_ttc=float(total_ttc))
+        valid.append((inv, _parse_iso_date(date_iso)))
+    # Sort chronologically so piece/document numbering follows invoice date order
+    valid.sort(key=lambda t: t[1])
+    # --- 2. Assign piece / document numbers (global, sequential) ---
+    numbered: list[tuple[Invoice, date, str, str]] = []
+    piece_n = start_piece
+    doc_n = start_document
+    for inv, d in valid:
+        piece = _zfill_int(piece_n, piece_width)
+        document = _zfill_int(doc_n, document_width)
+        numbered.append((inv, d, piece, document))
+        piece_n += 1
+        doc_n += 1
+    # --- 3. Group by (year, month), then by day ---
+    by_month: dict[tuple[int, int], dict[int, list[tuple[Invoice, str, str]]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    for inv, d, piece, document in numbered:
+        by_month[(d.year, d.month)][d.day].append((inv, piece, document))
+    # --- 4. Write one workbook per month ---
+    out_dir = Path(output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    generated: list[str] = []
+    for (year, month), days in sorted(by_month.items()):
+        wb = Workbook()
+        wb.remove(wb.active)  # drop the default empty sheet
+        for day in sorted(days.keys()):
+            sheet_name = f"{day:02d}"
+            ws = wb.create_sheet(title=sheet_name)
+            if include_headers:
+                ws.append(HEADERS)
+            for inv, piece, document in days[day]:
+                for row in invoice_to_rows(inv, piece=piece, document=document, tva_rate=tva_rate):
+                    ws.append(row)
+            if include_headers and ws.max_row >= 1 and ws.max_column >= 1:
+                style_as_table(ws)
+        filepath = str(out_dir / f"{year}-{month:02d}.xlsx")
+        wb.save(filepath)
+        generated.append(filepath)
+    return generated

ebia-0.1.4.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,147 @@
+Metadata-Version: 2.4
+Name: ebia
+Version: 0.1.4
+Summary: EquipeBaie Invoice Automation
+Author: AAH
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: pdfplumber
+Requires-Dist: openpyxl
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-cov>=5.0; extra == "dev"
+Requires-Dist: pytest-mock>=3.12; extra == "dev"
+Requires-Dist: ruff>=0.4; extra == "dev"
+Requires-Dist: mypy>=1.10; extra == "dev"
+# EquipeBaie_Freelance-project
+Invoicing software that generates invoices and automatically syncs them to accounting tool.
+# EquipeBaie Invoice Automation (equipebaie-tools)
+This project aims to build a Python software tool that:
+1. Detects new invoices (PDF) in a folder
+2. Parses invoices and extracts required fields (EquipeBaie requirements)
+3. Processes/validates the extracted data
+4. Generates Excel reports (`.xlsx`) classified **by month** and **by week**
+At the moment, the package contains the first module: **PDF parser**.
+---
+## Current Status
+**Step 1 (implemented):** PDF parsing module
+**Next steps:** Excel generation + folder watcher (auto-detect new invoices) + pipeline orchestration
+---
+## Project Structure (wheel-ready)
+equipebaie_tools/
+├─ pyproject.toml
+├─ README.md
+├─ src/
+│  └─ equipebaie_tools/
+│     ├─ __init__.py
+│     ├─ parser.py
+│     └─ cli.py
+└─ tests/
+   └─ test_import.py
+- `src/equipebaie_tools/` is the installable Python package
+- `parser.py` exposes the main function: `extract_invoice_fields(pdf_path)`
+---
+## Requirements
+- Python >= 3.9
+- `pip` up to date
+Recommended: use a virtual environment.
+---
+## Installation (Development / Editable)
+```bash
+## 1) Create a virtual environment
+**Linux/macOS**
+python -m venv .venv
+source .venv/bin/activate
+Windows (PowerShell)
+python -m venv .venv
+.\.venv\Scripts\Activate.ps1
+## 2) Install the package in editable mode
+pip install -U pip
+pip install -e .
+## Quickstart (Build, Install, Run)
+1) Clone the repository
+git clone https://github.com/Alamajdoub9/EquipeBaie_Freelance-project.git
+cd EquipeBaie_Freelance-project
+2) Create and activate a virtual environment
+Linux/macOS
+python3 -m venv .venv
+source .venv/bin/activate
+Windows (PowerShell)
+python -m venv .venv
+.\.venv\Scripts\Activate.ps1
+3) Install build tools and project dependencies
+python -m pip install --upgrade pip
+python -m pip install build wheel setuptools
+When you install the wheel (next steps), dependencies are installed automatically.
+4) Build the wheel (.whl)
+Run this command from the project root (where pyproject.toml exists):
+python -m build -w
+After a successful build, you should have a wheel in:
+ls -lh dist/
+5) Install the wheel
+pip install --force-reinstall dist/*.whl
+Solution immédiate (offline)
+pip install --force-reinstall --no-deps dist/ebia-0.1.0-py3-none-any.whl
+6) Run the CLI
+Parse a PDF invoice and print extracted fields:
+ebia --path facture.pdf
+## Generate Excel (XLSX)
+The CLI `ebia` can generate an Excel file from:
+- a **single PDF invoice**, or
+- a **folder** containing multiple PDF invoices (3 rows per invoice, appended one after another).
+```bash
+1) Single PDF → one Excel file
+ebia --path "./invoices/facture.pdf" --out "./output/result.xlsx" --piece 0000 --document 00000
+2) Folder of PDFs → one consolidated Excel file (auto-increment piece/document)
+This mode reads all *.pdf files in the folder, parses each invoice, and appends 3 rows per invoice into the same Excel sheet.
+ebia --path "./invoices" --out "./output/global.xlsx" --start-piece 1 --start-document 1

ebia-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+ebia/__init__.py,sha256=qnXyW6dMupNrvl1D0KsnAy3aUhjTfCv_b5AADlaXUxo,103
+ebia/cli.py,sha256=ekBmF8OuZ54QBJHNQBaJSwhyAInRXE5zIbHU2SIoMx4,3914
+ebia/parser.py,sha256=e3wwLezXk1iffc60Ds3NUw3BAHPE0HgcgveY-z4EVS4,3051
+ebia/xls_generator.py,sha256=MI1BbQDj7OEQqm2UfUpCHOsEOpG6O6n0bdVpCTKKyjA,8169
+ebia-0.1.4.dist-info/METADATA,sha256=TCnF8HsFQOURUq-2j55jRcVJb9nHoV0uMwY00af5lB0,3618
+ebia-0.1.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+ebia-0.1.4.dist-info/entry_points.txt,sha256=5jzavokGBKkGnP2rJMEFWEb3YCxQn0Y8nBBh4v9TFtY,39
+ebia-0.1.4.dist-info/top_level.txt,sha256=yg2ZFbd1qylefB1j4xl022tAfrot0s5cSyBF2UrVIeQ,5
+ebia-0.1.4.dist-info/RECORD,,

ebia-0.1.4.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

ebia-0.1.4.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ ebia = ebia.cli:main

ebia-0.1.4.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ ebia