arttactic 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nafisa Sharif
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: arttactic
3
+ Version: 0.1.1
4
+ Summary: Processing library for art auction data
5
+ Author: Nafisa Sharif
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Nafisa Sharif
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Classifier: Programming Language :: Python :: 3
28
+ Classifier: License :: OSI Approved :: MIT License
29
+ Classifier: Operating System :: OS Independent
30
+ Requires-Python: >=3.8
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: pandas>=3.0.1
34
+ Requires-Dist: openpyxl>=3.1.5
35
+ Dynamic: license-file
36
+
37
+ # arttactic
38
+
39
+ A Python library for extracting and processing data from scraped art auction results. Currently designed to run on Linux or macOS, not yet tested on Windows.
40
+
41
+
42
+ ## Description
43
+
44
+ `arttactic` provides tools for working with art auction datasets, including:
45
+
46
+ * Cleaning and transforming auction data
47
+ * Applying buyer’s premium calculations
48
+ * Extracting mediums from lot information
49
+
50
+
51
+ ## Installation
52
+
53
+ (Recommended) In the terminal, create a virtual environment and activate it.
54
+
55
+ For example:
56
+
57
+ ```bash
58
+ python3 -m venv arttactic-venv
59
+ source arttactic-venv/bin/activate
60
+ ```
61
+
62
+ Then install the arttactic library:
63
+
64
+ ```bash
65
+ pip install arttactic
66
+ ```
67
+
68
+ ## Usage
69
+
70
+ From the terminal, run the following CLI command:
71
+
72
+ ```bash
73
+ run-arttactic
74
+ ```
75
+
76
+ This will launch a GUI that allows you to:
77
+ - Select a directory containing .csv files with scraped auction data,
78
+ - Select a single file you wish to add buyer's premiums to,
79
+ - Generate processed spreadsheets after selecting a directory,
80
+ - Generate processed spreadsheets after selecting a directory, and also find and save new mediums,
81
+ - Add buyer's premiums to a selected file.
82
+
83
+
84
+ ## Development
85
+
86
+ (Only relevant if you wish to edit the source code.)
87
+ To install locally in editable mode:
88
+
89
+ ```bash
90
+ pip install -e .
91
+ ```
@@ -0,0 +1,55 @@
1
+ # arttactic
2
+
3
+ A Python library for extracting and processing data from scraped art auction results. Currently designed to run on Linux or macOS, not yet tested on Windows.
4
+
5
+
6
+ ## Description
7
+
8
+ `arttactic` provides tools for working with art auction datasets, including:
9
+
10
+ * Cleaning and transforming auction data
11
+ * Applying buyer’s premium calculations
12
+ * Extracting mediums from lot information
13
+
14
+
15
+ ## Installation
16
+
17
+ (Recommended) In the terminal, create a virtual environment and activate it.
18
+
19
+ For example:
20
+
21
+ ```bash
22
+ python3 -m venv arttactic-venv
23
+ source arttactic-venv/bin/activate
24
+ ```
25
+
26
+ Then install the arttactic library:
27
+
28
+ ```bash
29
+ pip install arttactic
30
+ ```
31
+
32
+ ## Usage
33
+
34
+ From the terminal, run the following CLI command:
35
+
36
+ ```bash
37
+ run-arttactic
38
+ ```
39
+
40
+ This will launch a GUI that allows you to:
41
+ - Select a directory containing .csv files with scraped auction data,
42
+ - Select a single file you wish to add buyer's premiums to,
43
+ - Generate processed spreadsheets after selecting a directory,
44
+ - Generate processed spreadsheets after selecting a directory, and also find and save new mediums,
45
+ - Add buyer's premiums to a selected file.
46
+
47
+
48
+ ## Development
49
+
50
+ (Only relevant if you wish to edit the source code.)
51
+ To install locally in editable mode:
52
+
53
+ ```bash
54
+ pip install -e .
55
+ ```
File without changes
File without changes
@@ -0,0 +1,101 @@
1
+ import pandas as pd
2
+
3
+ from arttactic.variables.reg_exs import yyyy, yyyy_yy, yyyy_yyyy
4
+
5
+
6
+
7
+ def fill_auto_years(row: pd.Series, creation_terms: list, conceived_terms: list) -> pd.Series:
8
+ try:
9
+ info = row["Info (Auto)"]
10
+ row["Creation Year (Auto)"] = []
11
+ row["Conceived Year (Auto)"] = []
12
+ row["Flags (Auto)"] = []
13
+
14
+ birth_year = 0
15
+
16
+ if 'cast' in ','.join(info):
17
+ for substring in info:
18
+ substring_split = substring.split()
19
+ if 'cast' in substring:
20
+ if substring.endswith('s'):
21
+ row["Creation Year (Auto)"].append(substring[-5:-1])
22
+ elif yyyy.fullmatch(substring_split[-1]):
23
+ row["Creation Year (Auto)"].append(substring[-4:])
24
+ elif yyyy_yy.fullmatch(substring_split[-1]):
25
+ row["Creation Year (Auto)"].append(f"{substring_split[-1][:2]}{substring[-2:]}")
26
+ if 'dated' in substring:
27
+ row["Conceived Year (Auto)"].append(substring)
28
+
29
+ else:
30
+ for substring in info:
31
+ substring_split = substring.split()
32
+ if substring_split[0] in creation_terms:
33
+ if len(substring_split[-1]) != 4:
34
+ row["Conceived Year (Auto)"].append(substring)
35
+ if substring_split[-1].endswith('s'):
36
+ row["Creation Year (Auto)"].append(substring[-5:-1])
37
+ else:
38
+ if yyyy_yy.fullmatch(substring_split[-1]):
39
+ try:
40
+ int_test = int(row["Born"])
41
+ birth_year = (str(int_test))
42
+ except:
43
+ raise Exception("no birth year")
44
+ if birth_year != 0:
45
+ creation_year = substring_split[-1][5:]
46
+ century = int((birth_year)[:2])
47
+ try:
48
+ if int(creation_year) > int((birth_year)[2:]):
49
+ full_year = (f"{century}{creation_year}")
50
+ elif int(creation_year) < int((birth_year)[2:]):
51
+ century = century + 1
52
+ full_year = (f"{century}{creation_year}")
53
+ row["Creation Year (Auto)"].append(full_year)
54
+ except:
55
+ pass
56
+ elif yyyy_yyyy.fullmatch(substring_split[-1]):
57
+ row["Creation Year (Auto)"].append(substring[-4:])
58
+ else:
59
+ row["Creation Year (Auto)"].append(substring[-4:])
60
+
61
+ if substring_split[0] in conceived_terms:
62
+ row["Conceived Year (Auto)"].append(substring)
63
+
64
+ if 'circa' in substring:
65
+ row["Conceived Year (Auto)"].append(substring)
66
+
67
+ if (len(row["Creation Year (Auto)"]) > 1) and (len(set(row["Creation Year (Auto)"])) > 1):
68
+ row["Flags (Auto)"] = ["Multiple creation years in lot notes"]
69
+ try:
70
+ int_test = int(row["Creation Year"])
71
+ if str(row["Creation Year"]) in row["Creation Year (Auto)"]:
72
+ row["Creation Year (Auto)"] = row["Creation Year"]
73
+ else:
74
+ row["Creation Year (Auto)"] = max(row["Creation Year (Auto)"])
75
+ except:
76
+ row["Creation Year (Auto)"] = max(row["Creation Year (Auto)"])
77
+ else:
78
+ if row["Creation Year (Auto)"]:
79
+ row["Creation Year (Auto)"] = max(row["Creation Year (Auto)"])
80
+ except:
81
+ pass
82
+ return row
83
+
84
+ def add_flags(row: pd.Series) -> pd.Series:
85
+ if row["Info (Auto)"] and (len(row["Conceived Year (Auto)"]) == 0):
86
+ row["Flags (Auto)"].append("No conceived information in lot notes")
87
+
88
+ if not row["Info (Auto)"]:
89
+ row["Flags (Auto)"].append("No date information in lot notes")
90
+
91
+ elif row["Info (Auto)"] and (len(row["Creation Year (Auto)"]) == 0):
92
+ row["Flags (Auto)"].append("No creation year in lot notes")
93
+
94
+ elif str(row["Creation Year"]) != str(row["Creation Year (Auto)"]):
95
+ if pd.isnull(row["Creation Year"]):
96
+ row["Flags (Auto)"].append(f"Mismatched creation years: manual=None and auto={row['Creation Year (Auto)']}")
97
+ else:
98
+ row["Flags (Auto)"].append(f"Mismatched creation years: manual={row['Creation Year']} and auto={row['Creation Year (Auto)']}")
99
+
100
+ row["Flags (Auto)"] = ", ".join(row["Flags (Auto)"])
101
+ return row
@@ -0,0 +1,378 @@
1
+ import os
2
+ import sys
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from joblib import Parallel, delayed
6
+ from importlib import resources
7
+
8
+ from arttactic.utils.df_utils import find_and_concatenate_csvs, set_column_types, add_auto_columns
9
+ from arttactic.utils.string_utils import clean_string, clean_birth_years, clean_death_years, return_substrings, trim_substrings, clean_short_years, find_term_in_line, remove_duplicate_mediums
10
+ from arttactic.extraction import fill_auto_years, add_flags
11
+ from arttactic.premiums import add_premium_columns
12
+ from arttactic.variables.search_lists import search_terms, creation_terms, conceived_terms
13
+
14
+
15
+
16
+ def define_new_dataframe() -> pd.DataFrame:
17
+ template_df = pd.DataFrame(columns=[
18
+ "Database Id",
19
+ "Artist",
20
+ "Artist First Names",
21
+ "Artist Last Names",
22
+ "Artist Chinese Name",
23
+ "Born",
24
+ "Dead",
25
+ "-",
26
+ "Title",
27
+ "Creation Year",
28
+ "Conceived Year (if other than Creation Year)",
29
+ "Medium (Category)",
30
+ "Medium (Detailed)",
31
+ "Height (cm)",
32
+ "Width (cm)",
33
+ "Depth (cm)",
34
+ "Size (Detailed)",
35
+ "Orientation (if 2D work)",
36
+ "Sqm (if 2D work)",
37
+ "Edition No",
38
+ "Size of edition",
39
+ "Lot Notes (Detailed)",
40
+ "-",
41
+ "Low Estimate (USD)",
42
+ "High Estimate (USD)",
43
+ "Average Estimate (USD)",
44
+ "Hammer Price (USD)",
45
+ "Aggregate Price (USD)",
46
+ "Price/Average",
47
+ "-",
48
+ "$1m+",
49
+ "$1m+ Low Estimate",
50
+ "$1m+ Hammer",
51
+ "$1m+ Premium",
52
+ "-",
53
+ "Guarantees",
54
+ "Type of Guarantee",
55
+ "Return on Guarantee",
56
+ "-",
57
+ "Unsold Comment",
58
+ "Paddle No.",
59
+ "Buyer Type",
60
+ "Buyer Info",
61
+ "Underbidder Info",
62
+ "Online Interest",
63
+ "-",
64
+ "Auction House",
65
+ "Location",
66
+ "Region",
67
+ "Sale Date",
68
+ "Sale Month",
69
+ "Quarter",
70
+ "1H/2H",
71
+ "Sale Year",
72
+ "-",
73
+ "Lot Number",
74
+ "Lot Sequence",
75
+ "Sale Title",
76
+ "Sale Number",
77
+ "Sale URL",
78
+ "Market Sector",
79
+ "Type of Sale",
80
+ "Timing of Sale",
81
+ "Sale Format",
82
+ "Sale Subject",
83
+ "Name of Single Owner",
84
+ "Named Collector?",
85
+ "Name of Collector",
86
+ "African Market Sector",
87
+ "Italian Market Sector",
88
+ "Latin American Market Sector",
89
+ "MENA Market Sector",
90
+ "Old Master Market Sector",
91
+ "Photo Market Sector",
92
+ "-",
93
+ "Currency Conversion Rate",
94
+ "Currency Code",
95
+ "-",
96
+ "Estimate on Request",
97
+ "Low Estimate (OC)",
98
+ "High Estimate (OC)",
99
+ "Average Estimate (OC)",
100
+ "Hammer Price (OC)",
101
+ "Aggregate Price (OC)",
102
+ "Price/Average (OC)",
103
+ "-",
104
+ "Gender",
105
+ "Dead/Alive",
106
+ "Active Century",
107
+ "Nationality",
108
+ "Artist Region",
109
+ "-",
110
+ "Repeat Sale Id",
111
+ "Provenance",
112
+ "Auction House RS",
113
+ "City",
114
+ "Estimate",
115
+ "Previous Sale Value (US Premium)",
116
+ "Previous Sale Date",
117
+ "% Increase",
118
+ "Holding Period",
119
+ "CAGR",
120
+ "Previous Owners",
121
+ "Previous Sales",
122
+ "-",
123
+ "Exhibition History",
124
+ "Literature",
125
+ "-",
126
+ "Movement",
127
+ "Movement Category",
128
+ "Black American?",
129
+ "Under 45 at at Time of Sale",
130
+ "Fresh Paint",
131
+ "Continental Region",
132
+ "African Region",
133
+ "Hong Kong Region",
134
+ "OMP Region",
135
+ "Modifier",
136
+ "OMP Active Century",
137
+ "Latin American Era",
138
+ "MENA Era",
139
+ "Photo Era",
140
+ "CWOA",
141
+ "Individual/Duo/Group/Collaboration",
142
+ "Notes",
143
+ "-",
144
+ "Wet Paint",
145
+ "Charity Sale",
146
+ "Recipient",
147
+ "Type of Charity",
148
+ "Flipped",
149
+ "-",
150
+ "Series",
151
+ "Category",
152
+ "-",
153
+ "Blank 1",
154
+ "Blank 2",
155
+ "Blank 3",
156
+ "Blank 4",
157
+ "Blank 5",
158
+ "-",
159
+ "Lot url",
160
+ "-",
161
+ "Original Database ID",
162
+ "Previous Database ID",
163
+ "-",
164
+ "Upload Status"
165
+ ])
166
+ return template_df
167
+
168
+ def populate_new_df_from_old_scrapes(template_df: pd.DataFrame, old_scrapes_df: pd.DataFrame) -> pd.DataFrame:
169
+ # Populate columns with data from the concatenated files
170
+ new_df = template_df.copy()
171
+ new_df["Artist"] = old_scrapes_df["artist"]
172
+ new_df["Born"] = old_scrapes_df["artist_born"]
173
+ new_df["Artist_dates"] = old_scrapes_df["artist_dates"]
174
+ new_df["Dead"] = old_scrapes_df["artist_died"]
175
+ new_df["Auction House"] = old_scrapes_df["auction_house"]
176
+ new_df["Creation Year"] = old_scrapes_df["created"]
177
+ new_df ["Currency Code"] = old_scrapes_df["currency"]
178
+ new_df["Size (Detailed)"] = old_scrapes_df["dimensions"]
179
+ new_df["Guarantees"] = old_scrapes_df["guarantee"]
180
+ new_df["High Estimate (OC)"] = old_scrapes_df["high_estimate"]
181
+ new_df["Lot Notes (Detailed)"] = old_scrapes_df["lot_notes"]
182
+ new_df["Lot Number"] = old_scrapes_df["lot_number"]
183
+ new_df["Lot url"] = old_scrapes_df["lot_url"]
184
+ new_df["Low Estimate (OC)"] = old_scrapes_df["low_estimate"]
185
+ new_df["Medium (Detailed)"] = old_scrapes_df["medium"]
186
+ new_df["Underbidder Info"] = old_scrapes_df["number_of_bids"]
187
+ new_df["Aggregate Price (OC)"] = old_scrapes_df["price_realised"]
188
+ new_df["Provenance"] = old_scrapes_df["provenance"]
189
+ new_df["Sale Date"] = old_scrapes_df["sale_date"]
190
+ new_df["Location"] = old_scrapes_df["sale_location"]
191
+ new_df["Sale Number"] = old_scrapes_df["sale_number"]
192
+ new_df["Sale Title"] = old_scrapes_df["sale_title"]
193
+ new_df["Title"] = old_scrapes_df["title"]
194
+ return new_df
195
+
196
+ def populate_new_df_from_new_scrapes(template_df: pd.DataFrame, new_scrapes_df: pd.DataFrame) -> pd.DataFrame:
197
+ # Populate columns with data from the concatenated files
198
+ new_df = template_df.copy()
199
+ new_df["Artist"] = new_scrapes_df["Artist Name"]
200
+ new_df["Born"] = pd.Series()
201
+ new_df["Artist_dates"] = new_scrapes_df["Artist Dates"]
202
+ new_df["Dead"] = pd.Series()
203
+ new_df["Auction House"] = new_scrapes_df["Auction House"]
204
+ new_df["Creation Year"] = new_scrapes_df["Created Date"]
205
+ new_df ["Currency Code"] = new_scrapes_df["Currency"]
206
+ new_df["Size (Detailed)"] = new_scrapes_df["Dimensions"]
207
+ new_df["Guarantees"] = new_scrapes_df["Guarantee"]
208
+ new_df["High Estimate (OC)"] = new_scrapes_df["High Estimate"]
209
+ new_df["Lot Notes (Detailed)"] = new_scrapes_df["Lot Notes"]
210
+ new_df["Lot Number"] = new_scrapes_df["Lot Number"]
211
+ new_df["Lot url"] = new_scrapes_df["Lot URL"]
212
+ new_df["Low Estimate (OC)"] = new_scrapes_df["Low Estimate"]
213
+ new_df["Medium (Detailed)"] = new_scrapes_df["Medium"]
214
+ new_df["Aggregate Price (OC)"] = new_scrapes_df["Price Realised"]
215
+ new_df["Provenance"] = new_scrapes_df["Provenance"]
216
+ new_df["Sale Date"] = new_scrapes_df["Sale Date"]
217
+ new_df["Location"] = new_scrapes_df["Sale Location"]
218
+ if "Sale Number" in new_scrapes_df.columns:
219
+ new_df["Sale Number"] = new_scrapes_df["Sale Number"]
220
+ else:
221
+ new_df["Sale Number"] = pd.Series()
222
+ new_df["Sale Title"] = new_scrapes_df["Sale Name"]
223
+ new_df["Title"] = new_scrapes_df["Lot Name"]
224
+ return new_df
225
+
226
+
227
+ def prepare_df(data_path: str) -> pd.DataFrame:
228
+ os.chdir(data_path)
229
+ old_scrapes_df, new_scrapes_df = find_and_concatenate_csvs()
230
+ new_df = define_new_dataframe()
231
+ old_scrapes_mapped = pd.DataFrame()
232
+ new_scrapes_mapped = pd.DataFrame()
233
+ if len(old_scrapes_df) > 0:
234
+ old_scrapes_mapped = populate_new_df_from_old_scrapes(new_df, old_scrapes_df)
235
+ if len(new_scrapes_df) > 0:
236
+ new_scrapes_mapped = populate_new_df_from_new_scrapes(new_df, new_scrapes_df)
237
+ df = pd.concat([old_scrapes_mapped, new_scrapes_mapped], ignore_index=True)
238
+ df = set_column_types(df)
239
+ df = add_auto_columns(df)
240
+ df["Aggregate Price (OC)"] = df["Aggregate Price (OC)"].apply(lambda x: str(x).replace(",",""))
241
+ return df
242
+
243
+
244
+ def extract_info(df: pd.DataFrame, search_terms: list) -> pd.DataFrame:
245
+ for i in range(len(df)):
246
+ try:
247
+ df["Lot Notes (Auto)"][i].extend(clean_string(df["Lot Notes (Detailed)"][i]))
248
+ except Exception as e:
249
+ print("Exception cleaning lot notes: ", e)
250
+ print(i)
251
+ try:
252
+ int(df.loc[i,"Born"])
253
+ except:
254
+ try:
255
+ df["Born"].iloc[i] = (clean_birth_years(df["Artist_dates"].iloc[i]))
256
+ except Exception as e:
257
+ print("Exception extracting birth years: ", e)
258
+ print(i)
259
+ try:
260
+ int(df.loc[i,"Dead"])
261
+ except:
262
+ try:
263
+ df.loc[i,"Dead"] = (clean_death_years(df.loc[i,"Artist_dates"]))
264
+ except Exception as e:
265
+ print("Exception extracting death years: ", e)
266
+ print(i)
267
+ try:
268
+ #df.loc[i,"Info (Auto)"] = (return_substrings(df.loc[i,"Lot Notes (Auto)"], search_terms))
269
+ lot_notes_split = df["Lot Notes (Auto)"][i]
270
+ substrings = return_substrings(lot_notes_split, search_terms)
271
+ for sublist in substrings:
272
+ df["Info (Auto)"][i].extend(sublist)
273
+ except Exception as e:
274
+ print("Exception creating substrings: ", e)
275
+ print(i)
276
+ try:
277
+ trimmed = (trim_substrings(df["Info (Auto)"][i]))
278
+ df["Info (Auto)"][i].extend(trimmed)
279
+ except Exception as e:
280
+ print("Exception trimming substrings: ", e)
281
+ print(i)
282
+ try:
283
+ short_years_cleaned = clean_short_years(df.iloc[i])
284
+ df["Info (Auto)"][i].extend(short_years_cleaned)
285
+ except Exception as e:
286
+ print("Exception cleaning short years: ", e)
287
+ print(i)
288
+ df["Lot Notes (Auto)"] = df["Lot Notes (Auto)"].apply(lambda x : " ".join(x))
289
+ return df
290
+
291
+
292
+ def extract_mediums(df: pd.DataFrame, data_path: str, find_new: bool) -> pd.DataFrame:
293
+ os.chdir(sys.path[0])
294
+ with resources.files("arttactic").joinpath("data/Medium_List.xlsx").open("rb") as f:
295
+ medium_data = pd.read_excel(f)
296
+
297
+ medium_list = medium_data["Medium (Detailed)"]
298
+ new_mediums = []
299
+ for i in range(len(df)):
300
+ df["Medium (Auto)"][i] = []
301
+ if type(df.loc[i, "Medium (Detailed)"]) == str:
302
+ df["Medium (Auto)"][i] = str(df.loc[i, "Medium (Detailed)"])
303
+ else:
304
+ if type(df["Lot Notes (Detailed)"].iloc[i]) == str:
305
+ split_lines = df["Lot Notes (Detailed)"].iloc[i].splitlines()
306
+ for line in split_lines:
307
+ if any(word in line for word in search_terms) or ("grateful" in line) or ("painter" in line) or ("florentine" in line.lower()) or (line in str(df["Title"].iloc[i])) or (line in str(df["Artist"].iloc[i])):
308
+ pass
309
+ elif line in medium_list:
310
+ df["Medium (Auto)"].iloc[i].append(line)
311
+ else:
312
+ if find_new:
313
+ df["Medium (Auto)"].iloc[i] = []
314
+ cleaned_line = " ".join(clean_string(line))
315
+ result = Parallel(n_jobs=8)(delayed(find_term_in_line)(x, cleaned_line) for x in medium_list)
316
+ result_list = list(result)
317
+ if len(set(result_list)) > 1:
318
+ df["Medium (Auto)"].iloc[i].append(cleaned_line)
319
+ new_mediums.append(cleaned_line)
320
+ if df["Medium (Auto)"].iloc[i] == []:
321
+ cleaned_string = " ".join(clean_string(df["Lot Notes (Detailed)"].iloc[i]))
322
+ for medium in medium_list:
323
+ if medium.lower() in cleaned_string:
324
+ df["Medium (Auto)"].iloc[i].append(medium.lower())
325
+ df["Medium (Auto)"].iloc[i] = remove_duplicate_mediums(df["Medium (Auto)"].iloc[i])
326
+ df["Medium (Auto)"].iloc[i] = ", ".join(df["Medium (Auto)"].iloc[i])
327
+ if len(new_mediums) > 0:
328
+ new_mediums = list(set(new_mediums))
329
+ new_medium_df = pd.DataFrame({"Medium (Detailed)" : new_mediums})
330
+ combined = pd.concat([medium_data, new_medium_df])
331
+ combined = combined.drop_duplicates()
332
+ combined.to_excel("data/Medium_List.xlsx", index=False)
333
+ os.chdir(data_path)
334
+ return df
335
+
336
+
337
+ def clean_artist_names(df: pd.DataFrame) -> pd.DataFrame:
338
+ for i in range(len(df)):
339
+ if type(df.loc[i, "Artist"]) != str:
340
+ df.loc[i, "Artist (Auto)"] = ""
341
+ else:
342
+ df.loc[i, "Artist (Auto)"] = df.loc[i, "Artist"].split('(')[0].strip()
343
+ df.loc[i, "Artist (Auto)"] = df.loc[i, "Artist (Auto)"].title()
344
+ return df
345
+
346
+
347
+ def fill_years_and_add_flags(df: pd.DataFrame, creation_terms: list, conceived_terms: list) -> pd.DataFrame:
348
+ for i in range(len(df)):
349
+ try:
350
+ df.iloc[i] = fill_auto_years(df.iloc[i], creation_terms, conceived_terms)
351
+ except Exception as e:
352
+ print("Exception filling auto years: ", e)
353
+ print(i)
354
+ try:
355
+ df.iloc[i] = add_flags(df.iloc[i])
356
+ except Exception as e:
357
+ print("Exception adding flags: ", e)
358
+ print(i)
359
+ return df
360
+
361
+
362
+ def extract_from_scrapes(data_path: str, find_new_mediums: bool=False):
363
+ df = prepare_df(data_path)
364
+ print("Preparing new spreadsheet template")
365
+ df = extract_info(df, search_terms)
366
+ print("Extracting mediums")
367
+ df = extract_mediums(df, data_path, find_new=find_new_mediums)
368
+ print("Cleaning artist names")
369
+ df = clean_artist_names(df)
370
+ print("Calculating premiums")
371
+ df = add_premium_columns(df)
372
+ print("Extracting dates")
373
+ df = fill_years_and_add_flags(df, creation_terms, conceived_terms)
374
+ date_and_time = datetime.now().strftime('%d-%m-%Y %H.%M')
375
+ time_now = datetime.now().strftime('%H:%M')
376
+ df.to_excel(f"Mapped scrape with dates {date_and_time}.xlsx", index=False)
377
+ print(f"File written successfully at {time_now}")
378
+
@@ -0,0 +1,133 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import re
4
+ from importlib import resources
5
+
6
+
7
+ def load_premium_data():
8
+ with resources.files("arttactic").joinpath("data/Buyer's Premium.xlsx").open("rb") as f:
9
+ return pd.read_excel(f)
10
+
11
+ def get_date_row(house, location, date):
12
+ premium_data = load_premium_data()
13
+ selected_rows = premium_data[(premium_data["Auction House"] == house) & (premium_data['Location'] == location)]
14
+ date = pd.to_datetime(date.date())
15
+ if selected_rows["Date Started"].dropna().max() < date:
16
+ # print('latest row')
17
+ date_row = selected_rows[selected_rows["Date Started"] == selected_rows["Date Started"].dropna().max()]
18
+
19
+ elif date < selected_rows["Date Ended"].dropna().min():
20
+ # print('earliest row')
21
+ date_row = selected_rows[selected_rows["Date Ended"] == selected_rows["Date Ended"].dropna().min()]
22
+
23
+ else:
24
+ date_row = selected_rows[(pd.to_datetime(selected_rows["Date Ended"]) > date) & (selected_rows["Date Started"] < date)]
25
+ return date_row
26
+
27
+ def find_tier(price, date_row):
28
+ tier_1_boundary = date_row["tier_1_boundary"].values[0]
29
+ tier_2_boundary = date_row["tier_2_boundary"].values[0]
30
+ tier_3_boundary = date_row["tier_3_boundary"].values[0]
31
+
32
+ numerical_values = re.findall(r'\d+', str(price))
33
+
34
+ if len(numerical_values) > 0:
35
+ price = numerical_values[0]
36
+ if int(price) <= tier_1_boundary:
37
+ return 1
38
+ elif (int(price) > tier_1_boundary) & (int(price) <= tier_2_boundary):
39
+ return 2
40
+ elif (int(price) > tier_3_boundary):
41
+ if pd.isna(tier_3_boundary):
42
+ return 3
43
+ elif (int(price) <= tier_3_boundary):
44
+ return 3
45
+ elif (int(price) > tier_3_boundary):
46
+ return 4
47
+
48
+
49
+ def calculate_premium(row, date_row):
50
+ tier_1_boundary = date_row["tier_1_boundary"].values[0]
51
+ tier_2_boundary = date_row["tier_2_boundary"].values[0]
52
+ tier_3_boundary = date_row["tier_3_boundary"].values[0]
53
+
54
+ if type(row["Tier (Auto)"]) == float and row["Tier (Auto)"] > 0:
55
+ numerical_values = re.findall(r'\d+', str(row["Aggregate Price (OC)"]))
56
+ price = float(numerical_values[0])
57
+ # print(row["Aggregate Price (OC)"])
58
+ if row["Tier (Auto)"] == 1:
59
+ return float(price*date_row["Tier 1 %"])/float(1 + date_row["Tier 1 %"])
60
+ elif row["Tier (Auto)"] == 2:
61
+ tier1_premium = (tier_1_boundary*date_row["Tier 1 %"])/(1 + date_row["Tier 1 %"])
62
+ tier2_portion = price - tier_1_boundary
63
+ tier2_premium = (tier2_portion*date_row["Tier 2 %"])/(1 + date_row["Tier 2 %"])
64
+ return np.round(float(tier1_premium+tier2_premium), 2)
65
+ elif row["Tier (Auto)"] == 3:
66
+ tier1_premium = (tier_1_boundary*date_row["Tier 1 %"])/(1 + date_row["Tier 1 %"])
67
+ tier2_portion = tier_2_boundary - tier_1_boundary
68
+ tier2_premium = (tier2_portion*date_row["Tier 2 %"])/(1 + date_row["Tier 2 %"])
69
+ tier3_portion = price - tier_2_boundary
70
+ tier3_premium = (tier3_portion*date_row["Tier 3 %"])/(1 + date_row["Tier 3 %"])
71
+ return np.round(float(tier1_premium+tier2_premium+tier3_premium), 2)
72
+ elif row["Tier (Auto)"] == 4:
73
+ tier1_premium = (tier_1_boundary*date_row["Tier 1 %"])/(1 + date_row["Tier 1 %"])
74
+ tier2_portion = tier_2_boundary - tier_1_boundary
75
+ tier2_premium = (tier2_portion*date_row["Tier 2 %"])/(1 + date_row["Tier 2 %"])
76
+ tier3_portion = tier_3_boundary - tier_2_boundary
77
+ tier3_premium = (tier3_portion*date_row["Tier 3 %"])/(1 + date_row["Tier 3 %"])
78
+ tier4_portion = price - tier_3_boundary
79
+ tier4_premium = (tier4_portion*date_row["Tier 4 %"])/(1 + date_row["Tier 4 %"])
80
+ return np.round(float(tier1_premium+tier2_premium+tier3_premium+tier4_premium), 2)
81
+
82
+
83
+ def calculate_hammer_price(row):
84
+ if type(row["Tier (Auto)"]) == float and row["Tier (Auto)"] > 0:
85
+ numerical_values = re.findall(r'\d+', str(row["Aggregate Price (OC)"]))
86
+ price = float(numerical_values[0])
87
+ return (price - row["Premium (Auto)"])
88
+
89
+
90
+ def add_premium_columns(df):
91
+ auction_dict = dict(list(df.groupby(["Auction House", "Location", "Sale Date"])))
92
+ for key, value in auction_dict.items():
93
+ house = key[0]
94
+ location = key[1]
95
+ date = key[2]
96
+
97
+ if location == "York":
98
+ location = "New York"
99
+ value["Location"] = "New York"
100
+ if location == "Kong":
101
+ location = "Hong Kong"
102
+ value["Location"] = "Hong Kong"
103
+ try:
104
+ date = pd.to_datetime(date)
105
+ except:
106
+ individual_dates = [x for x in date.split('-')]
107
+ dates = []
108
+ for d in individual_dates:
109
+ try:
110
+ d = pd.to_datetime(d)
111
+ dates.append(d)
112
+ except:
113
+ pass
114
+ date = max(dates)
115
+
116
+
117
+ date_row = get_date_row(house, location, date)
118
+
119
+ if len(date_row) == 0:
120
+ value["Tier (Auto)"] = ""
121
+ value["Premium (Auto)"] = ""
122
+ value["Hammer Price (Auto)"] = ""
123
+ else:
124
+ value["Tier (Auto)"] = value.apply(lambda row : find_tier(row["Aggregate Price (OC)"], date_row), axis=1)
125
+ value["Premium (Auto)"] = value.apply(lambda row : calculate_premium(row, date_row), axis=1)
126
+ # print(value["Aggregate Price (OC)"])
127
+ value["Hammer Price (Auto)"] = value.apply(lambda row : calculate_hammer_price(row), axis=1)
128
+ return pd.concat(auction_dict.values())
129
+
130
+ def add_premium_columns_only(data_path: str):
131
+ existing_data = pd.read_excel(data_path)
132
+ data_with_premium_columns = add_premium_columns(existing_data)
133
+ data_with_premium_columns.to_excel(data_path, index=False, encoding='utf-8')
@@ -0,0 +1,79 @@
1
+ import tkinter as tk
2
+ from tkinter import filedialog, messagebox
3
+ import warnings
4
+ import threading
5
+
6
+ from arttactic.mixed_scrapes import extract_from_scrapes
7
+ from arttactic.premiums import add_premium_columns_only
8
+
9
+
10
+ class ProcessScrapes(object):
11
+ def __init__(self, root):
12
+ self.root = root
13
+ self.data_directory = None
14
+ self.existing_file = None
15
+
16
+ def choose_dir(self):
17
+ self.data_directory = filedialog.askdirectory()
18
+
19
+ def choose_file(self):
20
+ self.existing_file = filedialog.askopenfilename()
21
+
22
+ def create_spreadsheet(self):
23
+ if self.data_directory is not None:
24
+ threading.Thread(
25
+ target=extract_from_scrapes,
26
+ args=(self.data_directory,),
27
+ daemon=True
28
+ ).start()
29
+ else:
30
+ messagebox.showwarning(title=None, message="No directory has been selected.")
31
+
32
+ def create_spreadsheet_find_new_mediums(self):
33
+ if self.data_directory is not None:
34
+ threading.Thread(
35
+ target=extract_from_scrapes,
36
+ args=(self.data_directory,),
37
+ kwargs={"find_new_mediums": True},
38
+ daemon=True
39
+ ).start()
40
+ else:
41
+ messagebox.showwarning(title=None, message="No directory has been selected.")
42
+
43
+ def create_spreadsheet_premiums_only(self):
44
+ if self.existing_file is not None:
45
+ threading.Thread(
46
+ target=add_premium_columns_only,
47
+ args=(self.existing_file,),
48
+ daemon=True
49
+ ).start()
50
+ else:
51
+ messagebox.showwarning(title=None, message="No file has been selected.")
52
+
53
+ def pack_buttons(self):
54
+ button1 = tk.Button(self.root, text='Select directory', command=self.choose_dir, fg='#F66441')
55
+ button1.pack(side='top', pady=(40,4))
56
+ button2 = tk.Button(self.root, text='Select file', command=self.choose_file, fg='#4D8FFB')
57
+ button2.pack(side='top', pady=(4,40))
58
+ button3 = tk.Button(self.root, text='Create spreadsheet', command=self.create_spreadsheet, fg='#F66441')
59
+ button3.pack(side='top', pady=4)
60
+ button4 = tk.Button(self.root, text='Create spreadsheet (find new mediums)', command=self.create_spreadsheet_find_new_mediums, fg='#F66441')
61
+ button4.pack(side='top', pady=4)
62
+ button5 = tk.Button(self.root, text='Add premiums to existing file', command=self.create_spreadsheet_premiums_only, fg='#4D8FFB')
63
+ button5.pack(side='top', pady=4)
64
+
65
+
66
+ def main():
67
+ warnings.filterwarnings("ignore")
68
+ root = tk.Tk()
69
+ # img = tk.Image("photo", file="icon.png")
70
+ # root.iconphoto(True, img)
71
+ root.title("ArtTactic Scrape Processor")
72
+ root.geometry('400x300')
73
+ root.resizable(0, 0)
74
+ instance = ProcessScrapes(root)
75
+ instance.pack_buttons()
76
+ root.mainloop()
77
+
78
+ if __name__ == "__main__":
79
+ main()
File without changes
@@ -0,0 +1,47 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import glob
4
+
5
+
6
+
7
+ def find_and_concatenate_csvs():
8
+ # Find all the .csv files in the current folder
9
+ csv_files = glob.glob('*.{}'.format('csv'))
10
+ old_scrapes = []
11
+ new_scrapes = []
12
+ # Separate the old and new scrapes
13
+ for f in csv_files:
14
+ df = pd.read_csv(f, encoding='utf8')
15
+ if "artist_dates" in df.columns:
16
+ old_scrapes.append(df)
17
+ else:
18
+ new_scrapes.append(df)
19
+ # Concatenate all the .csv files
20
+ if len(old_scrapes) > 0:
21
+ old_scrapes_df = pd.concat(old_scrapes, ignore_index=True)
22
+ else:
23
+ old_scrapes_df = pd.DataFrame()
24
+ if len(new_scrapes) > 0:
25
+ new_scrapes_df = pd.concat(new_scrapes, ignore_index=True)
26
+ else:
27
+ new_scrapes_df = pd.DataFrame()
28
+ return old_scrapes_df, new_scrapes_df
29
+
30
+
31
+ # Deprecated
32
+ def set_column_types(df):
33
+ df['Artist_dates'] = df['Artist_dates'].astype(str)
34
+ # Replace '<NA>' instances with NaN in artist_born column (important for merging column later)
35
+ df["Artist_dates"] = df["Artist_dates"].replace('<NA>', np.nan)
36
+ return df
37
+
38
+
39
+ def add_auto_columns(df):
40
+ df["Lot Notes (Auto)"] = [[] for _ in range(len(df))]
41
+ df["Creation Year (Auto)"] = pd.Series([None]*len(df), dtype=object)
42
+ df["Conceived Year (Auto)"] = pd.Series([None]*len(df), dtype=object)
43
+ df["Medium (Auto)"] = [ [] for _ in range(len(df)) ]
44
+ df["Info (Auto)"] = [ [] for _ in range(len(df)) ]
45
+ df["Flags (Auto)"] = pd.Series([None]*len(df), dtype=object)
46
+ df["Artist (Auto)"] = pd.Series([None]*len(df), dtype=object)
47
+ return df
@@ -0,0 +1,119 @@
1
+ import pandas as pd
2
+ import re
3
+
4
+ import os
5
+ print (os.getcwd())
6
+
7
+ from arttactic.variables.reg_exs import bd_year, yy, yyyy, yyyys, yyyy_yy, yyyy_yyyy, yyyys_yyyys
8
+
9
+
10
+
11
+ def clean_string(string_to_clean: str) -> list:
12
+ string_to_clean = str(string_to_clean).lower()
13
+ string_to_clean = re.sub(",","",string_to_clean)
14
+ string_to_clean = re.sub("’","",string_to_clean)
15
+ string_to_clean = re.sub("‘","",string_to_clean)
16
+ string_to_clean = re.sub("'","",string_to_clean)
17
+ string_to_clean = re.sub("\"","",string_to_clean)
18
+ string_to_clean = re.sub(";","",string_to_clean)
19
+ string_to_clean = re.sub("\[","",string_to_clean)
20
+ string_to_clean = re.sub("]","",string_to_clean)
21
+ string_to_clean = re.sub("xad","",string_to_clean)
22
+ string_to_clean = re.sub("\\\\","",string_to_clean)
23
+ string_to_clean = re.sub("\."," ",string_to_clean)
24
+ string_to_clean = re.sub("<br>","",string_to_clean)
25
+ string_to_clean = re.sub("<i>","",string_to_clean)
26
+ string_to_clean = re.sub("</i>","",string_to_clean)
27
+ return string_to_clean.split()
28
+
29
+
30
+ def clean_birth_years(artist_dates) -> int:
31
+ try:
32
+ return int(re.findall(bd_year, artist_dates)[0])
33
+ except:
34
+ return float("NaN")
35
+
36
+
37
+ def clean_death_years(artist_dates) -> int:
38
+ try:
39
+ dates = re.findall(bd_year, artist_dates)
40
+ if len(dates) == 2:
41
+ return int(dates[1])
42
+ except:
43
+ return float("NaN")
44
+
45
+
46
+ def return_substrings(lot_notes_split: list, search_terms: list) -> list:
47
+ substrings = []
48
+ for term in search_terms:
49
+ if term == "cast":
50
+ limit = 14
51
+ elif term == "enlarged":
52
+ limit = 8
53
+ else:
54
+ limit = 6
55
+
56
+ for i,word in enumerate(lot_notes_split):
57
+ if word == term:
58
+ word_position = i
59
+ substring = lot_notes_split[word_position:word_position+limit]
60
+ substrings.append(list(substring))
61
+ return substrings
62
+
63
+
64
+ def trim_substrings(substrings: list) -> list:
65
+ new_substrings = []
66
+ for i,substring in enumerate(substrings):
67
+ b = re.findall(r'\d{2}', ' '.join(substring))
68
+ for word in substring:
69
+ try:
70
+ if b[-1] in word:
71
+ trimmed_substring = substring[:(substring.index(word)+1)]
72
+ new_substrings.append(trimmed_substring)
73
+ except:
74
+ pass
75
+ return new_substrings
76
+
77
+
78
+ def clean_short_years(row: pd.Series) -> list:
79
+ new_substrings = row["Info (Auto)"]
80
+ birth_year = 0
81
+ filtered_substrings = []
82
+
83
+ for substring in new_substrings:
84
+ if yy.fullmatch(substring[-1]) and "label" not in substring:
85
+ short_creation_year = int(substring[-1])
86
+ try:
87
+ birth_year = (row["Born"])
88
+ except:
89
+ raise Exception("no birth year")
90
+ if birth_year > 0:
91
+ century = int(str(birth_year)[:2])
92
+
93
+ try:
94
+ if short_creation_year > int(str(birth_year)[2:]):
95
+ substring[-1] = (f"{century}{short_creation_year}")
96
+ elif short_creation_year < int(str(birth_year)[2:]):
97
+ century = century + 1
98
+ if len(str(short_creation_year)) == 2:
99
+ substring[-1] = (f"{century}{short_creation_year}")
100
+ elif len(str(short_creation_year)) == 1:
101
+ substring[-1] = (f"{century}0{short_creation_year}")
102
+ except:
103
+ pass
104
+ if (yyyy.fullmatch(substring[-1])) or (yyyys.fullmatch(substring[-1])) or (yyyy_yyyy.fullmatch(substring[-1])) or (yyyy_yy.fullmatch(substring[-1])) or (yyyys_yyyys.fullmatch(substring[-1])):
105
+ filtered_substrings.append(' '.join(substring))
106
+ return filtered_substrings
107
+
108
+
109
+ def find_term_in_line(term, line):
110
+ if re.search(r'\b'+re.escape(term)+r'\b', line):
111
+ return line
112
+
113
+ def remove_duplicate_mediums(raw_medium_output: list):
114
+ cleaned_medium_output = []
115
+ raw_medium_output.sort(key=len)
116
+ for i,word in enumerate(raw_medium_output):
117
+ if word not in " ".join(raw_medium_output[i+1:]):
118
+ cleaned_medium_output.append(word)
119
+ return cleaned_medium_output
File without changes
@@ -0,0 +1,16 @@
1
+ import re
2
+
3
+
4
+ bd_year = re.compile(r'\d{4}')
5
+
6
+ yy = re.compile(r'\d{2}') # c
7
+
8
+ yyyy = re.compile(r'\d{4}') # d
9
+
10
+ yyyys = re.compile(r'[1-2][0-9][0-9][0-9][s]') # e
11
+
12
+ yyyy_yy = re.compile(r'[1-2][0-9][0-9][0-9]-[0-9][0-9]') # g
13
+
14
+ yyyy_yyyy = re.compile(r'[1-2][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]') # f
15
+
16
+ yyyys_yyyys = re.compile(r'[1-2][0-9][0-9][0-9][s]-[1-2][0-9][0-9][0-9][s]') # w
@@ -0,0 +1,43 @@
1
+
2
+ search_terms = [
3
+ "drawn",
4
+ "enlarged",
5
+ "dated",
6
+ "executed",
7
+ "painted",
8
+ "photographed",
9
+ "printed",
10
+ "manufactured",
11
+ "produced",
12
+ "begun",
13
+ "cast",
14
+ "made",
15
+ "created",
16
+ "published",
17
+ "screenprint in colors",
18
+ "conceived",
19
+ "designed"
20
+ ]
21
+
22
+ creation_terms = [
23
+ "drawn",
24
+ "enlarged",
25
+ "dated",
26
+ "executed",
27
+ "painted",
28
+ "photographed",
29
+ "printed",
30
+ "manufactured",
31
+ "produced",
32
+ "begun",
33
+ "cast",
34
+ "made",
35
+ "created",
36
+ "published",
37
+ "screenprint in colors"
38
+ ]
39
+
40
+ conceived_terms = [
41
+ "conceived",
42
+ "designed"
43
+ ]
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: arttactic
3
+ Version: 0.1.1
4
+ Summary: Processing library for art auction data
5
+ Author: Nafisa Sharif
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Nafisa Sharif
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Classifier: Programming Language :: Python :: 3
28
+ Classifier: License :: OSI Approved :: MIT License
29
+ Classifier: Operating System :: OS Independent
30
+ Requires-Python: >=3.8
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: pandas>=3.0.1
34
+ Requires-Dist: openpyxl>=3.1.5
35
+ Dynamic: license-file
36
+
37
+ # arttactic
38
+
39
+ A Python library for extracting and processing data from scraped art auction results. Currently designed to run on Linux or macOS, not yet tested on Windows.
40
+
41
+
42
+ ## Description
43
+
44
+ `arttactic` provides tools for working with art auction datasets, including:
45
+
46
+ * Cleaning and transforming auction data
47
+ * Applying buyer’s premium calculations
48
+ * Extracting mediums from lot information
49
+
50
+
51
+ ## Installation
52
+
53
+ (Recommended) In the terminal, create a virtual environment and activate it.
54
+
55
+ For example:
56
+
57
+ ```bash
58
+ python3 -m venv arttactic-venv
59
+ source arttactic-venv/bin/activate
60
+ ```
61
+
62
+ Then install the arttactic library:
63
+
64
+ ```bash
65
+ pip install arttactic
66
+ ```
67
+
68
+ ## Usage
69
+
70
+ From the terminal, run the following CLI command:
71
+
72
+ ```bash
73
+ run-arttactic
74
+ ```
75
+
76
+ This will launch a GUI that allows you to:
77
+ - Select a directory containing .csv files with scraped auction data,
78
+ - Select a single file you wish to add buyer's premiums to,
79
+ - Generate processed spreadsheets after selecting a directory,
80
+ - Generate processed spreadsheets after selecting a directory, and also find and save new mediums,
81
+ - Add buyer's premiums to a selected file.
82
+
83
+
84
+ ## Development
85
+
86
+ (Only relevant if you wish to edit the source code.)
87
+ To install locally in editable mode:
88
+
89
+ ```bash
90
+ pip install -e .
91
+ ```
@@ -0,0 +1,23 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ arttactic/__init__.py
5
+ arttactic/extraction.py
6
+ arttactic/mixed_scrapes.py
7
+ arttactic/premiums.py
8
+ arttactic/run.py
9
+ arttactic.egg-info/PKG-INFO
10
+ arttactic.egg-info/SOURCES.txt
11
+ arttactic.egg-info/dependency_links.txt
12
+ arttactic.egg-info/entry_points.txt
13
+ arttactic.egg-info/requires.txt
14
+ arttactic.egg-info/top_level.txt
15
+ arttactic/data/Buyer's Premium.xlsx
16
+ arttactic/data/Medium_List.xlsx
17
+ arttactic/data/__init__.py
18
+ arttactic/utils/__init__.py
19
+ arttactic/utils/df_utils.py
20
+ arttactic/utils/string_utils.py
21
+ arttactic/variables/__init__.py
22
+ arttactic/variables/reg_exs.py
23
+ arttactic/variables/search_lists.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ run-arttactic = arttactic.run:main
@@ -0,0 +1,2 @@
1
+ pandas>=3.0.1
2
+ openpyxl>=3.1.5
@@ -0,0 +1,3 @@
1
+ arttactic
2
+ dist
3
+ test_env
@@ -0,0 +1,35 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "arttactic"
7
+ version = "0.1.1"
8
+ description = "Processing library for art auction data"
9
+ readme = {file="README.md", content-type = "text/markdown"}
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ {name="Nafisa Sharif"}
13
+ ]
14
+ license = {file="LICENSE"}
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent"
19
+ ]
20
+ dependencies = [
21
+ "pandas>=3.0.1",
22
+ "openpyxl>=3.1.5"
23
+ ]
24
+
25
+ [tool.setuptools.package-data]
26
+ arttactic = ["data/*.xlsx"]
27
+
28
+ [tool.setuptools]
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.packages.find]
32
+ where = ["."]
33
+
34
+ [project.scripts]
35
+ run-arttactic = "arttactic.run:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+