google-meridian 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {google_meridian-1.1.3.dist-info → google_meridian-1.1.5.dist-info}/METADATA +2 -2
- {google_meridian-1.1.3.dist-info → google_meridian-1.1.5.dist-info}/RECORD +15 -15
- meridian/analysis/analyzer.py +18 -11
- meridian/analysis/optimizer.py +292 -47
- meridian/constants.py +6 -4
- meridian/data/data_frame_input_data_builder.py +222 -61
- meridian/data/input_data_builder.py +3 -1
- meridian/data/load.py +210 -350
- meridian/model/model.py +3 -10
- meridian/model/prior_distribution.py +7 -4
- meridian/model/prior_sampler.py +2 -0
- meridian/version.py +1 -1
- {google_meridian-1.1.3.dist-info → google_meridian-1.1.5.dist-info}/WHEEL +0 -0
- {google_meridian-1.1.3.dist-info → google_meridian-1.1.5.dist-info}/licenses/LICENSE +0 -0
- {google_meridian-1.1.3.dist-info → google_meridian-1.1.5.dist-info}/top_level.txt +0 -0
|
@@ -30,25 +30,112 @@ __all__ = [
|
|
|
30
30
|
class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
31
31
|
"""Builds `InputData` from DataFrames."""
|
|
32
32
|
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
kpi_type: str,
|
|
36
|
+
default_geo_column: str = constants.GEO,
|
|
37
|
+
default_time_column: str = constants.TIME,
|
|
38
|
+
default_media_time_column: str = constants.TIME,
|
|
39
|
+
default_population_column: str = constants.POPULATION,
|
|
40
|
+
default_kpi_column: str = constants.KPI,
|
|
41
|
+
default_revenue_per_kpi_column: str = constants.REVENUE_PER_KPI,
|
|
42
|
+
):
|
|
43
|
+
super().__init__(kpi_type)
|
|
44
|
+
|
|
45
|
+
self._default_geo_column = default_geo_column
|
|
46
|
+
self._default_time_column = default_time_column
|
|
47
|
+
self._default_media_time_column = default_media_time_column
|
|
48
|
+
self._default_population_column = default_population_column
|
|
49
|
+
self._default_kpi_column = default_kpi_column
|
|
50
|
+
self._default_revenue_per_kpi_column = default_revenue_per_kpi_column
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def default_geo_column(self) -> str:
|
|
54
|
+
"""The default geo column name for this builder to use.
|
|
55
|
+
|
|
56
|
+
This column name is used when `geo_col` is not explicitly provided to a data
|
|
57
|
+
setter method.
|
|
58
|
+
|
|
59
|
+
By default, this is `"geo"`.
|
|
60
|
+
"""
|
|
61
|
+
return self._default_geo_column
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def default_time_column(self) -> str:
|
|
65
|
+
"""The default time column name for this builder to use.
|
|
66
|
+
|
|
67
|
+
This column name is used when `time_col` is not explicitly provided to a
|
|
68
|
+
data setter method.
|
|
69
|
+
|
|
70
|
+
By default, this is `"time"`.
|
|
71
|
+
"""
|
|
72
|
+
return self._default_time_column
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def default_media_time_column(self) -> str:
|
|
76
|
+
"""The default *media* time column name for this builder to use.
|
|
77
|
+
|
|
78
|
+
This column name is used when `media_time_col` is not explicitly provided to
|
|
79
|
+
a data setter method.
|
|
80
|
+
|
|
81
|
+
By default, this is also `"time"`, since most input dataframes are likely
|
|
82
|
+
to use the same time column for both their media execution and media spend
|
|
83
|
+
data.
|
|
84
|
+
"""
|
|
85
|
+
return self._default_media_time_column
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def default_population_column(self) -> str:
|
|
89
|
+
"""The default population column name for this builder to use.
|
|
90
|
+
|
|
91
|
+
This column name is used when `population_col` is not explicitly provided to
|
|
92
|
+
a data setter method.
|
|
93
|
+
|
|
94
|
+
By default, this is `"population"`.
|
|
95
|
+
"""
|
|
96
|
+
return self._default_population_column
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def default_kpi_column(self) -> str:
|
|
100
|
+
"""The default kpi column name for this builder to use.
|
|
101
|
+
|
|
102
|
+
This column name is used when `kpi_col` is not explicitly provided to a data
|
|
103
|
+
setter method.
|
|
104
|
+
|
|
105
|
+
By default, this is `"kpi"`.
|
|
106
|
+
"""
|
|
107
|
+
return self._default_kpi_column
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def default_revenue_per_kpi_column(self) -> str:
|
|
111
|
+
"""The default revenue per kpi column name for this builder to use.
|
|
112
|
+
|
|
113
|
+
This column name is used when `revenue_per_kpi_col` is not explicitly
|
|
114
|
+
provided to a data setter method.
|
|
115
|
+
|
|
116
|
+
By default, this is `"revenue_per_kpi"`.
|
|
117
|
+
"""
|
|
118
|
+
return self._default_revenue_per_kpi_column
|
|
119
|
+
|
|
33
120
|
def with_kpi(
|
|
34
121
|
self,
|
|
35
122
|
df: pd.DataFrame,
|
|
36
|
-
kpi_col: str =
|
|
37
|
-
time_col: str =
|
|
38
|
-
geo_col: str =
|
|
123
|
+
kpi_col: str | None = None,
|
|
124
|
+
time_col: str | None = None,
|
|
125
|
+
geo_col: str | None = None,
|
|
39
126
|
) -> 'DataFrameInputDataBuilder':
|
|
40
127
|
"""Reads KPI data from a DataFrame.
|
|
41
128
|
|
|
42
129
|
Args:
|
|
43
130
|
df: The DataFrame to read the KPI data from.
|
|
44
131
|
kpi_col: The name of the column containing the KPI values. If not
|
|
45
|
-
provided,
|
|
132
|
+
provided, `self.default_kpi_column` is used.
|
|
46
133
|
time_col: The name of the column containing the time coordinates. If not
|
|
47
|
-
provided,
|
|
134
|
+
provided, `self.default_time_column` is used.
|
|
48
135
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
49
|
-
If not provided,
|
|
50
|
-
has no geo column, a national model data is assumed and a geo
|
|
51
|
-
will be created internally with a single coordinate value
|
|
136
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
137
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
138
|
+
dimension will be created internally with a single coordinate value
|
|
52
139
|
`national_geo`.
|
|
53
140
|
|
|
54
141
|
Returns:
|
|
@@ -56,6 +143,10 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
56
143
|
"""
|
|
57
144
|
kpi_df = df.copy()
|
|
58
145
|
|
|
146
|
+
kpi_col = kpi_col or self.default_kpi_column
|
|
147
|
+
time_col = time_col or self.default_time_column
|
|
148
|
+
geo_col = geo_col or self.default_geo_column
|
|
149
|
+
|
|
59
150
|
### Validate ###
|
|
60
151
|
self._validate_cols(kpi_df, [kpi_col, time_col], [geo_col])
|
|
61
152
|
self._validate_coords(kpi_df, geo_col, time_col)
|
|
@@ -73,8 +164,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
73
164
|
self,
|
|
74
165
|
df: pd.DataFrame,
|
|
75
166
|
control_cols: list[str],
|
|
76
|
-
time_col: str =
|
|
77
|
-
geo_col: str =
|
|
167
|
+
time_col: str | None = None,
|
|
168
|
+
geo_col: str | None = None,
|
|
78
169
|
) -> 'DataFrameInputDataBuilder':
|
|
79
170
|
"""Reads controls data from a DataFrame.
|
|
80
171
|
|
|
@@ -82,18 +173,25 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
82
173
|
df: The DataFrame to read the controls data from.
|
|
83
174
|
control_cols: The names of the columns containing the controls values.
|
|
84
175
|
time_col: The name of the column containing the time coordinates. If not
|
|
85
|
-
provided,
|
|
176
|
+
provided, `self.default_time_column` is used.
|
|
86
177
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
87
|
-
If not provided,
|
|
88
|
-
has no geo column, a national model data is assumed and a geo
|
|
89
|
-
will be created internally with a single coordinate value
|
|
178
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
179
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
180
|
+
dimension will be created internally with a single coordinate value
|
|
90
181
|
`national_geo`.
|
|
91
182
|
|
|
92
183
|
Returns:
|
|
93
184
|
The `DataFrameInputDataBuilder` with the added controls data.
|
|
94
185
|
"""
|
|
186
|
+
if not control_cols:
|
|
187
|
+
warnings.warn('No control columns provided. Not adding controls data.')
|
|
188
|
+
return self
|
|
189
|
+
|
|
95
190
|
controls_df = df.copy()
|
|
96
191
|
|
|
192
|
+
time_col = time_col or self.default_time_column
|
|
193
|
+
geo_col = geo_col or self.default_geo_column
|
|
194
|
+
|
|
97
195
|
### Validate ###
|
|
98
196
|
self._validate_cols(
|
|
99
197
|
controls_df,
|
|
@@ -116,19 +214,19 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
116
214
|
def with_population(
|
|
117
215
|
self,
|
|
118
216
|
df: pd.DataFrame,
|
|
119
|
-
population_col: str =
|
|
120
|
-
geo_col: str =
|
|
217
|
+
population_col: str | None = None,
|
|
218
|
+
geo_col: str | None = None,
|
|
121
219
|
) -> 'DataFrameInputDataBuilder':
|
|
122
220
|
"""Reads population data from a DataFrame.
|
|
123
221
|
|
|
124
222
|
Args:
|
|
125
223
|
df: The DataFrame to read the population data from.
|
|
126
224
|
population_col: The name of the column containing the population values.
|
|
127
|
-
If not provided,
|
|
225
|
+
If not provided, `self.default_population_column` is used.
|
|
128
226
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
129
|
-
If not provided,
|
|
130
|
-
has no geo column, a national model data is assumed and a geo
|
|
131
|
-
will be created internally with a single coordinate value
|
|
227
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
228
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
229
|
+
dimension will be created internally with a single coordinate value
|
|
132
230
|
`national_geo`.
|
|
133
231
|
|
|
134
232
|
Returns:
|
|
@@ -136,6 +234,9 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
136
234
|
"""
|
|
137
235
|
population_df = df.copy()
|
|
138
236
|
|
|
237
|
+
population_col = population_col or self.default_population_column
|
|
238
|
+
geo_col = geo_col or self.default_geo_column
|
|
239
|
+
|
|
139
240
|
### Validate ###
|
|
140
241
|
self._validate_cols(population_df, [population_col], [geo_col])
|
|
141
242
|
self._validate_coords(population_df, geo_col)
|
|
@@ -157,22 +258,22 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
157
258
|
def with_revenue_per_kpi(
|
|
158
259
|
self,
|
|
159
260
|
df: pd.DataFrame,
|
|
160
|
-
revenue_per_kpi_col: str =
|
|
161
|
-
time_col: str =
|
|
162
|
-
geo_col: str =
|
|
261
|
+
revenue_per_kpi_col: str | None = None,
|
|
262
|
+
time_col: str | None = None,
|
|
263
|
+
geo_col: str | None = None,
|
|
163
264
|
) -> 'DataFrameInputDataBuilder':
|
|
164
265
|
"""Reads revenue per KPI data from a DataFrame.
|
|
165
266
|
|
|
166
267
|
Args:
|
|
167
268
|
df: The DataFrame to read the revenue per KPI data from.
|
|
168
269
|
revenue_per_kpi_col: The name of the column containing the revenue per KPI
|
|
169
|
-
values. If not provided,
|
|
270
|
+
values. If not provided, `self.default_revenue_per_kpi_column` is used.
|
|
170
271
|
time_col: The name of the column containing the time coordinates. If not
|
|
171
|
-
provided,
|
|
272
|
+
provided, `self.default_time_column` is used.
|
|
172
273
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
173
|
-
If not provided,
|
|
174
|
-
has no geo column, a national model data is assumed and a geo
|
|
175
|
-
will be created internally with a single coordinate value
|
|
274
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
275
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
276
|
+
dimension will be created internally with a single coordinate value
|
|
176
277
|
`national_geo`.
|
|
177
278
|
|
|
178
279
|
Returns:
|
|
@@ -180,6 +281,12 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
180
281
|
"""
|
|
181
282
|
revenue_per_kpi_df = df.copy()
|
|
182
283
|
|
|
284
|
+
revenue_per_kpi_col = (
|
|
285
|
+
revenue_per_kpi_col or self.default_revenue_per_kpi_column
|
|
286
|
+
)
|
|
287
|
+
time_col = time_col or self.default_time_column
|
|
288
|
+
geo_col = geo_col or self.default_geo_column
|
|
289
|
+
|
|
183
290
|
### Validate ###
|
|
184
291
|
self._validate_cols(
|
|
185
292
|
revenue_per_kpi_df,
|
|
@@ -209,8 +316,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
209
316
|
media_cols: list[str],
|
|
210
317
|
media_spend_cols: list[str],
|
|
211
318
|
media_channels: list[str],
|
|
212
|
-
time_col: str =
|
|
213
|
-
geo_col: str =
|
|
319
|
+
time_col: str | None = None,
|
|
320
|
+
geo_col: str | None = None,
|
|
214
321
|
) -> 'DataFrameInputDataBuilder':
|
|
215
322
|
"""Reads media and media spend data from a DataFrame.
|
|
216
323
|
|
|
@@ -223,21 +330,31 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
223
330
|
`media_cols` and `media_spend_cols` in length. These are also index
|
|
224
331
|
mapped.
|
|
225
332
|
time_col: The name of the column containing the time coordinates for media
|
|
226
|
-
spend and media time coordinates for media. If not provided,
|
|
227
|
-
|
|
333
|
+
spend and media time coordinates for media. If not provided,
|
|
334
|
+
`self.default_time_column` is used. Media time coordinates are inferred
|
|
335
|
+
from the same `time_col` and are potentially shorter than time
|
|
228
336
|
coordinates if media spend values are missing (NaN) for some t in
|
|
229
337
|
`time`. Media time must be equal or a subset of time.
|
|
230
338
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
231
|
-
If not provided,
|
|
232
|
-
has no geo column, a national model data is assumed and a geo
|
|
233
|
-
will be created internally with a single coordinate value
|
|
339
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
340
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
341
|
+
dimension will be created internally with a single coordinate value
|
|
234
342
|
`national_geo`.
|
|
235
343
|
|
|
236
344
|
Returns:
|
|
237
345
|
The `DataFrameInputDataBuilder` with the added media and media spend data.
|
|
238
346
|
"""
|
|
347
|
+
if not media_cols or not media_spend_cols or not media_channels:
|
|
348
|
+
raise ValueError(
|
|
349
|
+
'`media_cols`, `media_spend_cols`, and `media_channels` must not be '
|
|
350
|
+
'empty.'
|
|
351
|
+
)
|
|
352
|
+
|
|
239
353
|
media_df = df.copy()
|
|
240
354
|
|
|
355
|
+
time_col = time_col or self.default_time_column
|
|
356
|
+
geo_col = geo_col or self.default_geo_column
|
|
357
|
+
|
|
241
358
|
### Validate ###
|
|
242
359
|
# For a media dataframe, media and media_spend columns may be the same
|
|
243
360
|
# (e.g. if using media spend as media execution value), so here we validate
|
|
@@ -280,8 +397,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
280
397
|
frequency_cols: list[str],
|
|
281
398
|
rf_spend_cols: list[str],
|
|
282
399
|
rf_channels: list[str],
|
|
283
|
-
time_col: str =
|
|
284
|
-
geo_col: str =
|
|
400
|
+
time_col: str | None = None,
|
|
401
|
+
geo_col: str | None = None,
|
|
285
402
|
) -> 'DataFrameInputDataBuilder':
|
|
286
403
|
"""Reads reach, frequency, and rf spend data from a DataFrame.
|
|
287
404
|
|
|
@@ -295,21 +412,36 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
295
412
|
also index mapped.
|
|
296
413
|
time_col: The name of the column containing the time coordinates for rf
|
|
297
414
|
spend and media time coordinates for reach and frequency. If not
|
|
298
|
-
provided,
|
|
299
|
-
|
|
300
|
-
|
|
415
|
+
provided, `self.default_time_column` is used. Media time coordinates are
|
|
416
|
+
inferred from the same `time_col` and are potentially shorter than time
|
|
417
|
+
coordinates if media spend values are missing (NaN) for some t in
|
|
418
|
+
`time`. Media time must be equal or a subset of time.
|
|
301
419
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
302
|
-
If not provided,
|
|
303
|
-
has no geo column, a national model data is assumed and a geo
|
|
304
|
-
will be created internally with a single coordinate value
|
|
420
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
421
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
422
|
+
dimension will be created internally with a single coordinate value
|
|
305
423
|
`national_geo`.
|
|
306
424
|
|
|
307
425
|
Returns:
|
|
308
426
|
The `DataFrameInputDataBuilder` with the added reach, frequency, and rf
|
|
309
427
|
spend data.
|
|
310
428
|
"""
|
|
429
|
+
if (
|
|
430
|
+
not reach_cols
|
|
431
|
+
or not frequency_cols
|
|
432
|
+
or not rf_spend_cols
|
|
433
|
+
or not rf_channels
|
|
434
|
+
):
|
|
435
|
+
raise ValueError(
|
|
436
|
+
'`reach_cols`, `frequency_cols`, `rf_spend_cols`, and `rf_channels` '
|
|
437
|
+
'must not be empty.'
|
|
438
|
+
)
|
|
439
|
+
|
|
311
440
|
reach_df = df.copy()
|
|
312
441
|
|
|
442
|
+
time_col = time_col or self.default_time_column
|
|
443
|
+
geo_col = geo_col or self.default_geo_column
|
|
444
|
+
|
|
313
445
|
### Validate ###
|
|
314
446
|
self._validate_cols(
|
|
315
447
|
reach_df,
|
|
@@ -368,8 +500,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
368
500
|
df: pd.DataFrame,
|
|
369
501
|
organic_media_cols: list[str],
|
|
370
502
|
organic_media_channels: list[str] | None = None,
|
|
371
|
-
media_time_col: str =
|
|
372
|
-
geo_col: str =
|
|
503
|
+
media_time_col: str | None = None,
|
|
504
|
+
geo_col: str | None = None,
|
|
373
505
|
) -> 'DataFrameInputDataBuilder':
|
|
374
506
|
"""Reads organic media data from a DataFrame.
|
|
375
507
|
|
|
@@ -382,18 +514,24 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
382
514
|
provided, must match `organic_media_cols` in length. This is index
|
|
383
515
|
mapped.
|
|
384
516
|
media_time_col: The name of the column containing the media time
|
|
385
|
-
coordinates. If not provided,
|
|
517
|
+
coordinates. If not provided, `self.default_media_time_column` is used.
|
|
386
518
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
387
|
-
If not provided,
|
|
388
|
-
has no geo column, a national model data is assumed and a geo
|
|
389
|
-
will be created internally with a single coordinate value
|
|
519
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
520
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
521
|
+
dimension will be created internally with a single coordinate value
|
|
390
522
|
`national_geo`.
|
|
391
523
|
|
|
392
524
|
Returns:
|
|
393
525
|
The `DataFrameInputDataBuilder` with the added organic media data.
|
|
394
526
|
"""
|
|
527
|
+
if not organic_media_cols:
|
|
528
|
+
raise ValueError('`organic_media_cols` must not be empty.')
|
|
529
|
+
|
|
395
530
|
organic_media_df = df.copy()
|
|
396
531
|
|
|
532
|
+
media_time_col = media_time_col or self.default_media_time_column
|
|
533
|
+
geo_col = geo_col or self.default_geo_column
|
|
534
|
+
|
|
397
535
|
### Validate ###
|
|
398
536
|
if not organic_media_channels:
|
|
399
537
|
organic_media_channels = organic_media_cols
|
|
@@ -432,8 +570,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
432
570
|
organic_reach_cols: list[str],
|
|
433
571
|
organic_frequency_cols: list[str],
|
|
434
572
|
organic_rf_channels: list[str],
|
|
435
|
-
media_time_col: str =
|
|
436
|
-
geo_col: str =
|
|
573
|
+
media_time_col: str | None = None,
|
|
574
|
+
geo_col: str | None = None,
|
|
437
575
|
) -> 'DataFrameInputDataBuilder':
|
|
438
576
|
"""Reads organic reach and organic frequency data from a DataFrame.
|
|
439
577
|
|
|
@@ -447,19 +585,32 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
447
585
|
match `organic_reach_cols` and `organic_frequency_cols` in length. These
|
|
448
586
|
are also index mapped.
|
|
449
587
|
media_time_col: The name of the column containing the media time
|
|
450
|
-
coordinates. If not provided,
|
|
588
|
+
coordinates. If not provided, `self.default_media_time_column` is used.
|
|
451
589
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
452
|
-
If not provided,
|
|
453
|
-
has no geo column, a national model data is assumed and a geo
|
|
454
|
-
will be created internally with a single coordinate value
|
|
590
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
591
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
592
|
+
dimension will be created internally with a single coordinate value
|
|
455
593
|
`national_geo`.
|
|
456
594
|
|
|
457
595
|
Returns:
|
|
458
596
|
The `DataFrameInputDataBuilder` with the added organic reach and organic
|
|
459
597
|
frequency data.
|
|
460
598
|
"""
|
|
599
|
+
if (
|
|
600
|
+
not organic_reach_cols
|
|
601
|
+
or not organic_frequency_cols
|
|
602
|
+
or not organic_rf_channels
|
|
603
|
+
):
|
|
604
|
+
raise ValueError(
|
|
605
|
+
'`organic_reach_cols`, `organic_frequency_cols`, and'
|
|
606
|
+
' `organic_rf_channels` must not be empty.'
|
|
607
|
+
)
|
|
608
|
+
|
|
461
609
|
organic_reach_frequency_df = df.copy()
|
|
462
610
|
|
|
611
|
+
media_time_col = media_time_col or self.default_media_time_column
|
|
612
|
+
geo_col = geo_col or self.default_geo_column
|
|
613
|
+
|
|
463
614
|
### Validate ###
|
|
464
615
|
self._validate_cols(
|
|
465
616
|
organic_reach_frequency_df,
|
|
@@ -506,8 +657,8 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
506
657
|
self,
|
|
507
658
|
df: pd.DataFrame,
|
|
508
659
|
non_media_treatment_cols: list[str],
|
|
509
|
-
time_col: str =
|
|
510
|
-
geo_col: str =
|
|
660
|
+
time_col: str | None = None,
|
|
661
|
+
geo_col: str | None = None,
|
|
511
662
|
) -> 'DataFrameInputDataBuilder':
|
|
512
663
|
"""Reads non-media treatments data from a DataFrame.
|
|
513
664
|
|
|
@@ -516,18 +667,28 @@ class DataFrameInputDataBuilder(input_data_builder.InputDataBuilder):
|
|
|
516
667
|
non_media_treatment_cols: The names of the columns containing the
|
|
517
668
|
non-media treatments values.
|
|
518
669
|
time_col: The name of the column containing the time coordinates. If not
|
|
519
|
-
provided,
|
|
670
|
+
provided, `self.default_time_column` is used.
|
|
520
671
|
geo_col: (Optional) The name of the column containing the geo coordinates.
|
|
521
|
-
If not provided,
|
|
522
|
-
has no geo column, a national model data is assumed and a geo
|
|
523
|
-
will be created internally with a single coordinate value
|
|
672
|
+
If not provided, `self.default_geo_column` is used. If the DataFrame
|
|
673
|
+
provided has no geo column, a national model data is assumed and a geo
|
|
674
|
+
dimension will be created internally with a single coordinate value
|
|
524
675
|
`national_geo`.
|
|
525
676
|
|
|
526
677
|
Returns:
|
|
527
678
|
The `DataFrameInputDataBuilder` with the added non-media treatments data.
|
|
528
679
|
"""
|
|
680
|
+
if not non_media_treatment_cols:
|
|
681
|
+
warnings.warn(
|
|
682
|
+
'No non-media treatment columns were provided. Not adding non-media '
|
|
683
|
+
'treatments data.'
|
|
684
|
+
)
|
|
685
|
+
return self
|
|
686
|
+
|
|
529
687
|
non_media_treatments_df = df.copy()
|
|
530
688
|
|
|
689
|
+
time_col = time_col or self.default_time_column
|
|
690
|
+
geo_col = geo_col or self.default_geo_column
|
|
691
|
+
|
|
531
692
|
### Validate ###
|
|
532
693
|
self._validate_cols(
|
|
533
694
|
non_media_treatments_df,
|
|
@@ -134,7 +134,9 @@ class InputDataBuilder(abc.ABC):
|
|
|
134
134
|
if len(value) != len(set(value)):
|
|
135
135
|
raise ValueError('Geos must be unique.')
|
|
136
136
|
if self.geos is not None and set(self.geos) != set(value):
|
|
137
|
-
raise ValueError(
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f'geos already set to {self.geos}. Cannot reassign to {value}.'
|
|
139
|
+
)
|
|
138
140
|
self._geos = value
|
|
139
141
|
|
|
140
142
|
@property
|