PyPI - kahoot-to-anki - Versions diffs - 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

kahoot-to-anki 1.0.0py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

kahoot_to_anki/__init__.py +1 -1
kahoot_to_anki/cli.py +127 -0
kahoot_to_anki/main.py +40 -0
kahoot_to_anki/processing.py +143 -0
kahoot_to_anki-1.2.0.dist-info/METADATA +79 -0
kahoot_to_anki-1.2.0.dist-info/RECORD +13 -0
kahoot_to_anki-1.2.0.dist-info/entry_points.txt +2 -0
tests/test_cli.py +111 -0
tests/test_processing.py +340 -0
kahoot_to_anki/converter.py +0 -254
kahoot_to_anki-1.0.0.dist-info/METADATA +0 -16
kahoot_to_anki-1.0.0.dist-info/RECORD +0 -10
kahoot_to_anki-1.0.0.dist-info/entry_points.txt +0 -2
tests/test_project.py +0 -226
{kahoot_to_anki-1.0.0.dist-info → kahoot_to_anki-1.2.0.dist-info}/WHEEL +0 -0
{kahoot_to_anki-1.0.0.dist-info → kahoot_to_anki-1.2.0.dist-info}/licenses/LICENSE +0 -0
{kahoot_to_anki-1.0.0.dist-info → kahoot_to_anki-1.2.0.dist-info}/top_level.txt +0 -0

tests/test_processing.py ADDED Viewed

@@ -0,0 +1,340 @@
+import logging
+import zipfile
+import pandas as pd
+from kahoot_to_anki.processing import get_questions, get_excels, get_excel_data, df_processing, make_anki
+logging.basicConfig(level=logging.DEBUG)
+KAHOOT_SHEET_NAME = "RawReportData Data"
+def write_excel(df: pd.DataFrame, tmp_path, filename="sample.xlsx", sheet_name=KAHOOT_SHEET_NAME):
+    path = tmp_path / filename
+    df.to_excel(path, sheet_name=sheet_name, index=False)
+    return path
+# --- get_questions ---
+def test_get_questions_single_file(tmp_path):
+    """Test processing a single valid Kahoot Excel file."""
+    df = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["4"],
+        "Answer 2": ["3"],
+        "Answer 3": [""],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    excel_file = write_excel(df, tmp_path)
+    result_df = get_questions(input_directory=str(excel_file), sheet_name=KAHOOT_SHEET_NAME)
+    # Assertions
+    assert not result_df.empty
+    assert result_df.shape[0] == 1
+    assert "Question" in result_df.columns
+    assert result_df.iloc[0]["Question"] == "What is 2+2?"
+    expected_columns = ["Question", "Possible Answers", "Correct Answers"]
+    assert list(result_df.columns) == expected_columns
+def test_get_questions_deduplicates_questions(tmp_path):
+    """Test processing a single valid Kahoot Excel file with duplicated questions."""
+    df1 = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["4"],
+        "Answer 2": ["3"],
+        "Answer 3": [""],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    df2 = df1.copy()
+    df = pd.concat([df1, df2], ignore_index=True)
+    write_excel(df, tmp_path)
+    result_df = get_questions(input_directory=str(tmp_path), sheet_name=KAHOOT_SHEET_NAME)
+    # Assertions
+    assert result_df.shape[0] == 1
+def test_get_questions_handles_numeric_answers(tmp_path):
+    """Test processing a single valid Kahoot Excel file with mixed data types."""
+    df = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["2"],
+        "Answer 2": [4],
+        "Answer 3": ["6"],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    write_excel(df, tmp_path)
+    result_df = get_questions(input_directory=str(tmp_path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result_df.shape[0] == 1
+    assert "4" in result_df.iloc[0]["Possible Answers"]
+def test_get_questions_returns_empty_for_empty_file(tmp_path):
+    """Test that an empty Excel file returns an empty DataFrame."""
+    df = pd.DataFrame(columns=[
+        "Question Number", "Question", "Answer 1", "Answer 2", "Answer 3",
+        "Answer 4", "Answer 5", "Answer 6", "Correct Answers"
+    ])
+    write_excel(df, tmp_path)
+    result_df = get_questions(input_directory=str(tmp_path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result_df.empty
+def test_get_questions_merges_multiple_files(tmp_path):
+    df1 = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["4"],
+        "Answer 2": ["3"],
+        "Answer 3": [""],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    df2 = pd.DataFrame({
+        "Question Number": [2],
+        "Question": ["What is the capital of France?"],
+        "Answer 1": ["Berlin"],
+        "Answer 2": ["Paris"],
+        "Answer 3": ["Madrid"],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["Paris"]
+    })
+    # Write both files into the same temp directory
+    write_excel(df1, tmp_path, filename="quiz1.xlsx")
+    write_excel(df2, tmp_path, filename="quiz2.xlsx")
+    result_df = get_questions(input_directory=str(tmp_path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result_df.shape[0] == 2
+    assert "What is 2+2?" in result_df["Question"].values
+    assert "What is the capital of France?" in result_df["Question"].values
+def test_get_questions_skips_invalid_sheets(tmp_path):
+    df1 = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["4"],
+        "Answer 2": ["3"],
+        "Answer 3": [""],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    df2 = pd.DataFrame({
+        "Question Number": [2],
+        "Question": ["What is the capital of France?"],
+        "Answer 1": ["Berlin"],
+        "Answer 2": ["Paris"],
+        "Answer 3": ["Madrid"],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["Paris"]
+    })
+    df3 = pd.DataFrame({
+        "Question Number": [2],
+        "Question": ["What is the capital of Spain?"],
+        "Answer 1": ["Berlin"],
+        "Answer 2": ["Paris"],
+        "Answer 3": ["Madrid"],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["Madrid"]
+    })
+    # Write both files into the same temp directory
+    write_excel(df1, tmp_path, filename="quiz1.xlsx")
+    write_excel(df2, tmp_path, filename="quiz2.xlsx")
+    write_excel(df3, tmp_path, filename="quiz3.xlsx", sheet_name="TEST")
+    result_df = get_questions(input_directory=str(tmp_path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result_df.shape[0] == 2
+    assert "What is 2+2?" in result_df["Question"].values
+    assert "What is the capital of France?" in result_df["Question"].values
+# --- get_excels ---
+def test_get_excels_single_file(tmp_path):
+    """Test that get_excels yields a single file when given a single .xlsx file path."""
+    file = tmp_path / "single.xlsx"
+    file.write_text("dummy content")
+    result = list(get_excels(str(file)))
+    assert len(result) == 1
+    assert result[0].endswith("single.xlsx")
+def test_get_excels_multiple_files_in_directory(tmp_path):
+    """Test that get_excels yields all .xlsx files in a directory."""
+    file1 = tmp_path / "file1.xlsx"
+    file2 = tmp_path / "file2.xlsx"
+    file3 = tmp_path / "file3.csv"  # should be ignored
+    for f in [file1, file2]:
+        f.write_text("dummy content")
+    file3.write_text("should be ignored")
+    result = list(get_excels(str(tmp_path)))
+    assert len(result) == 2
+    assert all(f.endswith(".xlsx") for f in result)
+    assert str(file3) not in result
+# --- get_excel_data ---
+def test_get_excel_data_valid(tmp_path):
+    """Test reading a valid Excel file with correct sheet name."""
+    df = pd.DataFrame({"Question": ["Q1"], "Correct Answers": ["A1"]})
+    path = tmp_path / "valid.xlsx"
+    df.to_excel(path, sheet_name=KAHOOT_SHEET_NAME, index=False)
+    result = get_excel_data(str(path), sheet_name=KAHOOT_SHEET_NAME)
+    assert isinstance(result, pd.DataFrame)
+    assert not result.empty
+    assert "Question" in result.columns
+def test_get_excel_data_missing_sheet(tmp_path, caplog):
+    """Test behavior when the specified sheet name does not exist."""
+    df = pd.DataFrame({"Question": ["Q1"]})
+    path = tmp_path / "missing_sheet.xlsx"
+    df.to_excel(path, sheet_name="WrongSheet", index=False)
+    with caplog.at_level(logging.WARNING):
+        result = get_excel_data(str(path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result is None
+    assert "Skipping file" in caplog.text
+def test_get_excel_data_invalid_file(tmp_path, caplog):
+    """Test behavior when trying to read a corrupted Excel file."""
+    path = tmp_path / "fake.xlsx"
+    path.write_text("This is not a real Excel file.")
+    with caplog.at_level(logging.WARNING):
+        result = get_excel_data(str(path), sheet_name=KAHOOT_SHEET_NAME)
+    assert result is None
+    assert "Skipping file" in caplog.text
+# --- df_processing ---
+def test_df_processing_empty():
+    df = pd.DataFrame(columns=[
+        "Question Number", "Question", "Answer 1", "Answer 2", "Answer 3",
+        "Answer 4", "Answer 5", "Answer 6", "Correct Answers"
+    ])
+    result = df_processing(df)
+    assert result.empty
+    assert list(result.columns) == ["Question", "Possible Answers", "Correct Answers"]
+def test_df_processing_normal_case():
+    df = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["What is 2+2?"],
+        "Answer 1": ["2"],
+        "Answer 2": ["4"],
+        "Answer 3": ["3"],
+        "Answer 4": [""],
+        "Answer 5": [""],
+        "Answer 6": [""],
+        "Correct Answers": ["4"]
+    })
+    result = df_processing(df)
+    assert result.shape[0] == 1
+    assert "What is 2+2?" in result["Question"].values
+    assert "4" in result["Possible Answers"].iloc[0]
+def test_df_processing_duplicate_question_number():
+    df = pd.DataFrame({
+        "Question Number": [1, 1],
+        "Question": ["What is 2+2?", "What is 2+2?"],
+        "Answer 1": ["2", "2"],
+        "Answer 2": ["4", "4"],
+        "Answer 3": ["3", "3"],
+        "Answer 4": ["", ""],
+        "Answer 5": ["", ""],
+        "Answer 6": ["", ""],
+        "Correct Answers": ["4", "4"]
+    })
+    result = df_processing(df)
+    assert result.shape[0] == 1
+def test_df_processing_mixed_types():
+    df = pd.DataFrame({
+        "Question Number": [1],
+        "Question": ["How many continents?"],
+        "Answer 1": ["7"],
+        "Answer 2": [6],  # int type
+        "Answer 3": [""],
+        "Answer 4": [None],
+        "Answer 5": ["Five"],
+        "Answer 6": [""],
+        "Correct Answers": ["7"]
+    })
+    result = df_processing(df)
+    assert result.shape[0] == 1
+    assert "6" in result["Possible Answers"].iloc[0]  # check that int was converted
+    assert "None" not in result["Possible Answers"].iloc[0]  # check fillna
+# --- make_anki ---
+def test_make_anki_creates_apkg(tmp_path):
+    """Test that make_anki creates a valid .apkg file."""
+    # Prepare test data
+    df = pd.DataFrame({
+        "Question": ["What is 2+2?"],
+        "Possible Answers": ["2<br>3<br>4"],
+        "Correct Answers": ["4"]
+    })
+    # Run the function
+    make_anki(df=df, out=str(tmp_path), title="Test Deck")
+    # Check output file exists
+    output_file = tmp_path / "anki.apkg"
+    assert output_file.exists()
+    assert output_file.stat().st_size > 0
+    # Check that it's a valid ZIP file (as .apkg is zip format internally)
+    assert zipfile.is_zipfile(output_file)

kahoot_to_anki/converter.py DELETED Viewed

@@ -1,254 +0,0 @@
-# Standard library imports
-import argparse
-import logging
-import os
-import glob
-# Third-party library imports
-import genanki
-import pandas as pd
-# Configure logging settings
-logging.basicConfig(level=logging.INFO)
-# Constants
-DEFAULT_INPUT_DIRECTORY = "./data"
-DEFAULT_OUTPUT_DIRECTORY = "./"
-DEFAULT_DECK_TITLE = "Kahoot"
-KAHOOT_EXCEL_SHEET_NAME_RAW_DATA = "RawReportData Data"
-def main():
-    # Check command line arguments
-    inp, out, csv, title = get_commandline_arguments()
-    validation(inp, out)
-    df = get_questions(inp)
-    if csv:
-        df.to_csv(
-            os.path.join(out, "kahoot.csv"),
-            sep=";",
-            index=False,
-            encoding="utf-8-sig",
-        )
-    make_anki(df, out, title)
-def get_commandline_arguments() -> tuple:
-    """
-    Parses the command line arguments and returns a tuple with the input path, output path, CSV option, and deck title.
-    :return: A tuple with the input path, the output path, the csv and the title of the anki deck
-    :rtype: tuple
-    """
-    parser = argparse.ArgumentParser(description="Create Anki Deck from Kahoot answer")
-    parser.add_argument(
-        "-i",
-        "--inp",
-        default=DEFAULT_INPUT_DIRECTORY,
-        help=f"Path to the directory containing input Excel files or a single input Excel file. If a directory is "
-        f"provided, all Excel files in the directory will be processed. Default: {DEFAULT_INPUT_DIRECTORY}",
-        type=str,
-    )
-    parser.add_argument(
-        "-o",
-        "--out",
-        default=DEFAULT_OUTPUT_DIRECTORY,
-        help="Path to the directory where the Anki flashcards package will be generated. "
-        "If not specified, the package will be created in the current working directory.",
-        type=str,
-    )
-    parser.add_argument(
-        "--csv",
-        action="store_true",
-        help="Generate a CSV file with the question data.",
-    )
-    parser.add_argument(
-        "-t",
-        "--title",
-        default=DEFAULT_DECK_TITLE,
-        help="Name of the Anki deck to be created. "
-        f"If not specified, the default deck name '{DEFAULT_DECK_TITLE}' will be used.",
-        type=str,
-    )
-    args = parser.parse_args()
-    # return absolute paths
-    return os.path.abspath(args.inp), os.path.abspath(args.out), args.csv, args.title
-def validation(input_directory: str, output_directory: str) -> None:
-    """
-    This function validates the command line arguments, checking if the input path is a valid Excel file or directory
-    and if the output path is a valid directory.
-    The input path needs to be an Excel file or a directory that contains Excel files.
-    The output path needs to be a directory and not a file.
-    :param input_directory: The path of the input Excel or directory
-    :param output_directory: The path of the output directory
-    :return: None
-    :rtype: None
-    """
-    # Check if input is a file
-    if not os.path.exists(input_directory):
-        logging.error(f"Input directory {input_directory} does not exist!")
-        raise FileNotFoundError(f"Input directory {input_directory} does not exist!")
-    elif (
-        os.path.isfile(input_directory)
-        and os.path.splitext(input_directory)[-1] != ".xlsx"
-    ):
-        logging.error("Input file is not an excel file!")
-        raise ValueError("Input file is not an excel file!")
-    elif os.path.isdir(input_directory):
-        input_excels = os.path.join(input_directory, "*.xlsx")
-        if not glob.glob(input_excels):
-            logging.error("Input directory does not contain any excel files!")
-            raise FileNotFoundError("Input directory does not contain any excel files!")
-    # Check output directory and create when not existing
-    if not os.path.isdir(output_directory):
-        logging.error("Output is not a directory!")
-        raise ValueError("Output is not a directory!")
-    if not os.path.exists(output_directory):
-        try:
-            os.makedirs(output_directory)
-        except OSError as e:
-            logging.error(
-                "Failed to create output directory '%s': %s", output_directory, str(e)
-            )
-            raise
-def get_questions(input_directory: str) -> pd.DataFrame:
-    """
-    Extracts all the kahoot questions out of the Excel file(s)
-    :param input_directory: The path to the input directory or Excel file
-    :return: All the questions with the possible answers and the solution
-    :rtype: pd.DataFrame
-    """
-    def get_excels(path):
-        """
-        Returns a list with all Excels in the given path
-        :param path: the path to an Excel file or a directory with Excel files
-        :return: a list with all excels
-        """
-        if os.path.isfile(path):
-            yield path
-        else:
-            yield from glob.glob(os.path.join(input_directory, "*.xlsx"))
-            return [f for f in glob.glob(os.path.join(path, "*.xlsx"))]
-    def get_excel_data(excel_file: str) -> pd.DataFrame:
-        """
-        Returns a pd.DataFrame with the kahoot raw data
-        :param excel_file: an Excel file with Kahoot raw data
-        :return: a DataFrame with the data
-        """
-        try:
-            # read file
-            return pd.read_excel(
-                excel_file, sheet_name=KAHOOT_EXCEL_SHEET_NAME_RAW_DATA
-            )
-        except ValueError:
-            logging.warning(
-                "Skipping file '%s' as it is not a valid Excel file.", excel_file
-            )
-            return None
-        except Exception as e:
-            logging.error("Failed to read file '%s': %s", excel_file, str(e))
-            return None
-    def df_processing(data: pd.DataFrame) -> pd.DataFrame:
-        """
-        Processes the Kahoot question data.
-        :param data: DataFrame with Kahoot question data
-        :return: Processed DataFrame
-        """
-        # delete duplicated questions
-        data = data.drop_duplicates(subset=["Question Number"])
-        data = data.fillna("")
-        data["Possible Answers"] = data[
-            ["Answer 1", "Answer 2", "Answer 3", "Answer 4", "Answer 5", "Answer 6"]
-        ].agg("<br>".join, axis=1)
-        # keep only needed columns
-        data = data[["Question", "Possible Answers", "Correct Answers"]]
-        return data
-    out = pd.DataFrame(columns=["Question", "Possible Answers", "Correct Answers"])
-    questions_cnt = 0
-    files_cnt = 0
-    for file in get_excels(input_directory):
-        df = get_excel_data(file)
-        if df is None:
-            continue
-        files_cnt += 1
-        df = df_processing(df)
-        # add to out dataframe
-        out = pd.concat([out, df], axis=0, ignore_index=True)
-        questions_cnt += len(df)
-    logging.info("Read input files: %d", files_cnt)
-    logging.info("Read questions: %d", questions_cnt)
-    out = out.drop_duplicates(subset=["Question"])
-    return out
-def make_anki(df: pd.DataFrame, out: str, title: str) -> None:
-    """
-    Creates an Anki deck from the given Kahoot questions
-    :param df: The kahoot questions in a pd.DataFrame
-    :param out: The path to the output directory
-    :param title: The title of the Anki deck
-    :return: None
-    """
-    my_model = genanki.Model(
-        1607392319,
-        "Simple Model",
-        fields=[
-            {"name": "Question"},
-            {"name": "Answer"},
-            {"name": "selects"},
-        ],
-        templates=[
-            {
-                "name": "Card 1",
-                "qfmt": "{{Question}}<br><br>{{selects}}",
-                "afmt": '{{FrontSide}}<hr id="answer">{{Answer}}',
-            },
-        ],
-    )
-    my_deck = genanki.Deck(2059400110, title)
-    for index, row in df.iterrows():
-        my_note = genanki.Note(
-            model=my_model,
-            fields=[row["Question"], row["Correct Answers"], row["Possible Answers"]],
-        )
-        my_deck.add_note(my_note)
-    try:
-        genanki.Package(my_deck).write_to_file(
-            os.path.join(out, "anki.apkg"),
-        )
-    except Exception as e:
-        logging.error("Failed to write Anki package file: %s", str(e))
-if __name__ == "__main__":
-    main()

kahoot_to_anki-1.0.0.dist-info/METADATA DELETED Viewed

@@ -1,16 +0,0 @@
-Metadata-Version: 2.4
-Name: kahoot-to-anki
-Version: 1.0.0
-Summary: CLI tool to convert Kahoot quiz reports into Anki flashcards
-Author: Simon Hardmeier
-License-Expression: MIT
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: genanki
-Requires-Dist: pandas
-Requires-Dist: openpyxl
-Dynamic: license-file
-# kahoot-to-anki
-> python cli program to convert Kahoot quiz results to Anki flashcards

kahoot_to_anki-1.0.0.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-kahoot_to_anki/__init__.py,sha256=pquCLjCONKW_xGgL78_TyYqF9PQs7Ws2Tb5KeKCOPE4,67
-kahoot_to_anki/converter.py,sha256=hBjm4A8zcZeXK4dnXr7jojt-ntPaBdwJq68Nq_TYiko,8387
-kahoot_to_anki-1.0.0.dist-info/licenses/LICENSE,sha256=HxPBlT4sSfEgRBrX0jZd8WTfM0c31VFgnLaCEWzGMZc,1122
-tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/test_project.py,sha256=nGwU_Lys2QKJu3VrAnZiP1NS4ydEdkkqWPKQTCOZl8k,5355
-kahoot_to_anki-1.0.0.dist-info/METADATA,sha256=PCRUoo8tOnnuttRuZLv6o90T7YiLICaBjLTqc-26LaQ,457
-kahoot_to_anki-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-kahoot_to_anki-1.0.0.dist-info/entry_points.txt,sha256=Hp06_kNd2MJ0-ShO7KOI4TBreq0fqIVwx_a4i42C-sU,65
-kahoot_to_anki-1.0.0.dist-info/top_level.txt,sha256=aTMCk83rMZjWFZ556EHLIxVOgEawIWsMZzRpR3IPQ1w,21
-kahoot_to_anki-1.0.0.dist-info/RECORD,,

kahoot_to_anki-1.0.0.dist-info/entry_points.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- [console_scripts]
2	- kahoot-to-anki = kahoot_to_anki.converter:main

kahoot-to-anki 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

kahoot-to-anki 1.0.0py3-none-any.whl → 1.2.0py3-none-any.whl