PyPI - clip-protocol - Versions diffs - 2.2.14__tar.gz → 2.3__tar.gz - Mend

clip-protocol 2.2.14tar.gz → 2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

{clip_protocol-2.2.14 → clip_protocol-2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: clip_protocol
-Version: 2.2.14
+Version: 2.3
 Summary: Protocol to ensure the privatization of
 Project-URL: Homepage, https://github.com/martaajonees/Local_Privacy
 Project-URL: Issues, https://github.com/martaajonees/Local_Privacy/issues

clip_protocol-2.3/evaluation/AOI datasets/S1.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S10.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S11.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S12.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S13.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S14.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S15.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S16.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S17.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S18.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S19.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S2.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S20.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S3.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S4.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S5.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S6.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S7.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S8.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/S9.xlsx ADDED Viewed

Binary file

clip_protocol-2.3/evaluation/AOI datasets/process.py ADDED Viewed

@@ -0,0 +1,57 @@
+import pandas as pd
+import glob
+import os
+# Carpeta actual donde está el script
+current_folder = os.getcwd()
+# Buscar todos los archivos .xlsx en la carpeta actual
+xlsx_files = glob.glob(os.path.join(current_folder, "*.xlsx"))
+for file_path in xlsx_files:
+    try:
+        # Leer el archivo
+        df_temp = pd.read_excel(file_path)
+        if any(col.startswith("Unnamed") for col in df_temp.columns):
+            df = pd.read_excel(file_path, header=1)
+        else:
+            df = df_temp
+        # --- MEJORA: Limpiar nombres de columnas ---
+        # Eliminamos espacios en blanco extra y pasamos a minúsculas para comparar fácilmente
+        df.columns = [str(c).strip() for c in df.columns]
+        cols_lower = [c.lower() for c in df.columns]
+        # Buscamos los índices de las columnas que necesitamos (sin importar mayúsculas)
+        target_p = 'participant'
+        target_a = 'aoi hit'
+        if target_p in cols_lower and target_a in cols_lower:
+            # Identificar los nombres reales de las columnas en este archivo específico
+            real_col_p = df.columns[cols_lower.index(target_p)]
+            real_col_a = df.columns[cols_lower.index(target_a)]
+            # Obtener el nombre del archivo sin extensión para el participante
+            participant_name = os.path.splitext(os.path.basename(file_path))[0]
+            # Reemplazar valores y filtrar
+            df[real_col_p] = participant_name
+            # Mantener solo las dos columnas y limpiar nulos
+            df = df[[real_col_p, real_col_a]]
+            df = df[df[real_col_a].notna()]
+            # Renombrar a los nombres finales deseados
+            df.columns = ['Participant', 'AOI hit']
+            # Sobrescribir el archivo original
+            df.to_excel(file_path, index=False)
+            print(f"✅ Procesado: {os.path.basename(file_path)}")
+        else:
+            print(f"❌ Columnas no encontradas en {os.path.basename(file_path)}. Columnas detectadas: {list(df.columns)}")
+    except Exception as e:
+        print(f"⚠️ Error procesando {os.path.basename(file_path)}: {e}")
+print("\nProceso finalizado.")

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/experiment_1.py RENAMED Viewed

@@ -24,38 +24,6 @@ def run_command(e, k, m, df, privacy_method):
     return compute_error_table(get_real_frequency(df), df_estimated, 2), df_estimated
-def plot_latex(errors, path):
-    lines = [
-        r"\begin{figure}[h]",
-        r"\centering",
-        r"\begin{tikzpicture}",
-        r"\begin{axis}[",
-        r"  xlabel={$\epsilon$}, ylabel={Error},",
-        r"  legend style={at={(0.5,-0.15)}, anchor=north,legend columns=-1},",
-        r"  xmin=0, grid=major, width=12cm, height=8cm,",
-        r"  cycle list name=color list,",
-        r"]"
-    ]
-    for metric, points in errors.items():
-        name = "Lp Norm" if metric == "Lρ Norm" else metric
-        lines.append(r"\addplot coordinates {")
-        lines += [f"  ({eps}, {err})" for eps, err in sorted(points)]
-        lines.append(r"};")
-        lines.append(fr"\addlegendentry{{{name}}}")
-    lines += [
-        r"\end{axis}",
-        r"\end{tikzpicture}",
-        r"\caption{Evolución del error por métrica en función del parámetro $\epsilon$}",
-        r"\end{figure}"
-    ]
-    with open(path, "w") as f:
-        f.write("\n".join(lines))
-    print(f"✅ LaTeX graph saved to {path}")
 def run_experiment1(df, privacy_method):
     k = int(input("🔑 Enter k value: "))
     m = int(input("🔢 Enter m value: "))
@@ -65,13 +33,13 @@ def run_experiment1(df, privacy_method):
     epsilons = [round(e, 1) for e in list(reversed([x * 0.5 for x in range(1, 21)])) + [0.4, 0.3, 0.2, 0.1]]
     for eps in epsilons:
-        table, _ = run_command(eps, k, m, df, method)
+        print(f"Running with epsilon: {eps}")
+        table, _ = run_command(eps, k, m, df, privacy_method)
         for metric, val in table:
             error_history.setdefault(metric, []).append((eps, val))
     df = pd.DataFrame(error_history)
-    df.to_csv(f"figures/table_experiment_1_{privacy_method}.csv", index=False)
-    plot_latex(error_history, f"figures/experiment_1_{privacy_method}.tex")
+    df.to_csv(f"table_experiment_1_{privacy_method}.csv", index=False)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run experiment 1")
@@ -79,8 +47,8 @@ if __name__ == "__main__":
     args = parser.parse_args()
     distribution = input(" Enter the distribution 1/2/3/4: ")
-    pattern = f"aoi-hits-d{distribution}-5000"
+    pattern = f"SynLog-5000-d{distribution}"
     matching_files = [f for f in os.listdir(args.f) if pattern in f and f.endswith(".xlsx")]
     file_path = os.path.join(args.f, matching_files[0])

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/experiment_2.py RENAMED Viewed

@@ -115,7 +115,7 @@ def run_experiment_2(datasets_by_size, params):
                   "Iteraciones PCMeS", "Tiempo de ejecución PCMeS"]
     df_pivot = df_pivot[final_cols]
-    df_pivot.to_csv("figures/table_experiment_2.csv", index=False)
+    df_pivot.to_csv("table_experiment_2.csv", index=False)
 if __name__ == "__main__":
@@ -133,7 +133,8 @@ if __name__ == "__main__":
     datasets = {}
     for size in sizes:
-        pattern = f"aoi-hits-d{distribution}-{size}"
+        pattern = f"SynLog-{size}-d{distribution}"
         file_path = os.path.join(args.f, pattern + ".xlsx")
         header = 1 if "Unnamed" in pd.read_excel(file_path, nrows=1).columns[0] else 0
         df = pd.read_excel(file_path, header=header)

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/experiment_3.py RENAMED Viewed

@@ -66,7 +66,8 @@ def optimize_e(k, m, df, e_r, privacy_level, error_value, tolerance, privacy_met
 def run_experiment_3(datasets, params):
     error_value = 0.05
     tolerance = 0.01
-    privacy_level = "high"
+    # privacy_level = "high"
+    privacy_level = "low"
     for method in ["PCMeS", "PHCMS"]:
         row_apple = {"Método": "Método de Apple"}
@@ -89,7 +90,7 @@ def run_experiment_3(datasets, params):
             row_clip[size] = f"{epsilon:.2f} / {pe_error:.2f}"
         df_result = pd.DataFrame([row_apple, row_clip])
-        df_result.to_csv(f"figures/table_experiment_3_{method}.csv", index=False)
+        df_result.to_csv(f"table_experiment_3_{method}.csv", index=False)
 if __name__ == "__main__":
@@ -107,11 +108,11 @@ if __name__ == "__main__":
     datasets = {}
     for size in sizes:
-        pattern = f"aoi-hits-d{distribution}-{size}"
+        pattern = f"SynLog-{size}-d{distribution}"
         file_path = os.path.join(args.f, pattern + ".xlsx")
         header = 1 if "Unnamed" in pd.read_excel(file_path, nrows=1).columns[0] else 0
         df = pd.read_excel(file_path, header=header)
         datasets[size] = df
-    run_experiment_3(datasets, params)
+    run_experiment_3(datasets, params)

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/experiment_4.py RENAMED Viewed

@@ -84,7 +84,7 @@ def run_experiment_4(datasets, params):
             cleaned_table = [[col[0], col[1].replace('%', '') if isinstance(col[1], str) else col[1]] for col in filtered_table]
             error_by_aoi = pd.DataFrame(cleaned_table, columns=['AOI', 'Error'])
-            path_individual = f"figures/experimet_4_d{distribution}_{method}.csv"
+            path_individual = f"experimet_4_d{distribution}_{method}.csv"
             error_by_aoi.to_csv(path_individual, index=False)
 if __name__ == "__main__":
@@ -101,7 +101,7 @@ if __name__ == "__main__":
     datasets = {}
     for distribution in distributions:
-        pattern = f"aoi-hits-d{distribution}-5000"
+        pattern = f"SynLog-5000-d{distribution}"
         file_path = os.path.join(args.f, pattern + ".xlsx")
         header = 1 if "Unnamed" in pd.read_excel(file_path, nrows=1).columns[0] else 0
         df = pd.read_excel(file_path, header=header)

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/experiment_5.py RENAMED Viewed

@@ -149,10 +149,10 @@ def run_experiment_5(datasets, privatized_path):
             privatized_data.to_csv(filename, index=False)
     performance_df = pd.DataFrame(performance_records)
-    performance_df.to_csv("figures/experiment_5.csv", index=False)
+    performance_df.to_csv("experiment_5.csv", index=False)
     size_df = pd.DataFrame(size_comparison_records)
-    size_df.to_csv("figures/experiment_5_size_comparison.csv", index=False)
+    size_df.to_csv("experiment_5_size_comparison.csv", index=False)
 def load_excel_with_header_check(filepath):
     try:

{clip_protocol-2.2.14 → clip_protocol-2.3}/evaluation/figures/experiment_2_params.json RENAMED Viewed

@@ -6,7 +6,7 @@
   },
   "PHCMS": {
     "k": 145,
-    "m": 128,
+    "m": 256,
     "e_r": 8.0
   }
-}
+}

{clip_protocol-2.2.14 → clip_protocol-2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "clip_protocol"
-version = "2.2.14"
+version = "2.3"
 authors = [
   { name="Marta Jones González", email="martajon10@gmail.com" },
 ]

{clip_protocol-2.2.14 → clip_protocol-2.3}/src/clip_protocol/main/setup.py RENAMED Viewed

@@ -76,6 +76,7 @@ class Setup:
         self.df['value'] = self.df['value'].astype(str).apply(lambda x: x.strip())
         self.df = self.df[self.df['value'] != '-']
         self.df = self.df[self.df['value'].str.contains(r'\w', na=False)]
+        self.df = self.df.sample(frac=1, random_state=None).reset_index(drop=True)
         self.N = len(self.df)
     def run_command(self, e, k, m):

clip_protocol-2.2.14/evaluation/datasets-article/process.py DELETED Viewed

@@ -1,46 +0,0 @@
-import os
-import glob
-import re
-import pandas as pd
-import random
-import string
-import hashlib
-# ID único que quieres usar
-UNIQUE_USER_ID= ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))
-# Buscar todos los archivos xlsx en el directorio actual
-files = glob.glob("*.xlsx")
-for file in files:
-    print(f"Procesando {file}...")
-    # Leer el Excel
-    df = pd.read_excel(file)
-    # 1️⃣ Cambiar todos los user_id a uno único
-    if "user_id" in df.columns:
-        df["user_id"] = UNIQUE_USER_ID
-    else:
-        print(f"⚠️  No se encontró columna 'user_id' en {file}")
-    # 2️⃣ Renombrar la segunda columna a 'events'
-    if len(df.columns) >= 2:
-        cols = list(df.columns)
-        cols[1] = "events"
-        df.columns = cols
-    else:
-        print(f"⚠️  El archivo {file} no tiene al menos 2 columnas")
-    # 3️⃣ Reemplazar subevent_X → eX dentro de la columna events
-    if "events" in df.columns:
-        df["events"] = df["events"].astype(str).apply(
-            lambda x: re.sub(r"subevent_(\d+)", r"e\1", x)
-        )
-    # Guardar sobrescribiendo el archivo
-    df.to_excel(file, index=False)
-print("✅ Procesamiento terminado.")