py4conjoint 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py4conjoint-0.1.0/LICENSE +21 -0
- py4conjoint-0.1.0/PKG-INFO +68 -0
- py4conjoint-0.1.0/README.md +50 -0
- py4conjoint-0.1.0/pyproject.toml +28 -0
- py4conjoint-0.1.0/setup.cfg +4 -0
- py4conjoint-0.1.0/src/py4conjoint/__init__.py +399 -0
- py4conjoint-0.1.0/src/py4conjoint.egg-info/PKG-INFO +68 -0
- py4conjoint-0.1.0/src/py4conjoint.egg-info/SOURCES.txt +9 -0
- py4conjoint-0.1.0/src/py4conjoint.egg-info/dependency_links.txt +1 -0
- py4conjoint-0.1.0/src/py4conjoint.egg-info/requires.txt +1 -0
- py4conjoint-0.1.0/src/py4conjoint.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tetsugen HARUYAMA
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py4conjoint
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するツール
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Py4Conjoint/py4conjoint
|
|
7
|
+
Keywords: conjoint,survey,marketing,pandas
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Intended Audience :: Education
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: pandas>=1.5
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# py4conjoint
|
|
20
|
+
|
|
21
|
+
Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するPythonパッケージです。
|
|
22
|
+
|
|
23
|
+
## インストール
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install py4conjoint
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Google Colab では:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
!pip install py4conjoint
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## 使い方
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import py4conjoint as pc
|
|
40
|
+
|
|
41
|
+
# カード設計(プロファイル)を作成
|
|
42
|
+
cards = pd.DataFrame({ # P1 P2 P3 P4
|
|
43
|
+
"price": [6, 10, 6, 10],
|
|
44
|
+
"os": ["android", "apple", "apple", "android"],
|
|
45
|
+
"camera": ["標準", "標準", "高性能", "高性能"]
|
|
46
|
+
}, index=["P1", "P2", "P3", "P4"])
|
|
47
|
+
|
|
48
|
+
# Google Forms の回答CSVをlong形式に変換
|
|
49
|
+
df = pc.forms_to_conjoint_data(
|
|
50
|
+
responses_csv = "responses.csv",
|
|
51
|
+
n_cards = 4,
|
|
52
|
+
attributes = cards,
|
|
53
|
+
respondent_cols= {"性別": "gender"},
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## 出力形式
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
回答者ID プロファイルID rating gender price os camera
|
|
61
|
+
0 1 P1 4 女性 6 android 標準
|
|
62
|
+
1 1 P2 3 女性 10 apple 標準
|
|
63
|
+
...
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## ライセンス
|
|
67
|
+
|
|
68
|
+
MIT
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# py4conjoint
|
|
2
|
+
|
|
3
|
+
Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するPythonパッケージです。
|
|
4
|
+
|
|
5
|
+
## インストール
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install py4conjoint
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Google Colab では:
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
!pip install py4conjoint
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## 使い方
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import py4conjoint as pc
|
|
22
|
+
|
|
23
|
+
# カード設計(プロファイル)を作成
|
|
24
|
+
cards = pd.DataFrame({ # P1 P2 P3 P4
|
|
25
|
+
"price": [6, 10, 6, 10],
|
|
26
|
+
"os": ["android", "apple", "apple", "android"],
|
|
27
|
+
"camera": ["標準", "標準", "高性能", "高性能"]
|
|
28
|
+
}, index=["P1", "P2", "P3", "P4"])
|
|
29
|
+
|
|
30
|
+
# Google Forms の回答CSVをlong形式に変換
|
|
31
|
+
df = pc.forms_to_conjoint_data(
|
|
32
|
+
responses_csv = "responses.csv",
|
|
33
|
+
n_cards = 4,
|
|
34
|
+
attributes = cards,
|
|
35
|
+
respondent_cols= {"性別": "gender"},
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## 出力形式
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
回答者ID プロファイルID rating gender price os camera
|
|
43
|
+
0 1 P1 4 女性 6 android 標準
|
|
44
|
+
1 1 P2 3 女性 10 apple 標準
|
|
45
|
+
...
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## ライセンス
|
|
49
|
+
|
|
50
|
+
MIT
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "py4conjoint"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するツール"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pandas>=1.5",
|
|
14
|
+
]
|
|
15
|
+
keywords = ["conjoint", "survey", "marketing", "pandas"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Intended Audience :: Education",
|
|
20
|
+
"Intended Audience :: Science/Research",
|
|
21
|
+
"Topic :: Scientific/Engineering",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/Py4Conjoint/py4conjoint"
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.packages.find]
|
|
28
|
+
where = ["src"]
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""
|
|
2
|
+
py4conjoint
|
|
3
|
+
===========
|
|
4
|
+
Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換する。
|
|
5
|
+
|
|
6
|
+
インストール:
|
|
7
|
+
pip install py4conjoint
|
|
8
|
+
|
|
9
|
+
使い方A(cardsのDataFrameをそのまま渡す・推奨):
|
|
10
|
+
import py4conjoint as pc
|
|
11
|
+
|
|
12
|
+
cards = pd.DataFrame([
|
|
13
|
+
{"price": 6, "os": "android", "camera": "standard"}, # P1
|
|
14
|
+
{"price": 10, "os": "apple", "camera": "standard"}, # P2
|
|
15
|
+
{"price": 6, "os": "apple", "camera": "high"}, # P3
|
|
16
|
+
{"price": 10, "os": "android", "camera": "high"}, # P4
|
|
17
|
+
], index=["P1", "P2", "P3", "P4"])
|
|
18
|
+
|
|
19
|
+
df = pc.forms_to_conjoint_data(
|
|
20
|
+
responses_csv = "responses.csv",
|
|
21
|
+
n_cards = 4,
|
|
22
|
+
attributes = cards,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
使い方B(辞書のリストで渡す・従来形式):
|
|
26
|
+
import py4conjoint as pc
|
|
27
|
+
|
|
28
|
+
attributes = [
|
|
29
|
+
{"price": [6, 10, 6, 10]},
|
|
30
|
+
{"os": ["android", "apple", "apple", "android"]},
|
|
31
|
+
{"camera": ["standard", "standard", "high", "high"]},
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
df = pc.forms_to_conjoint_data(
|
|
35
|
+
responses_csv = "responses.csv",
|
|
36
|
+
n_cards = 4,
|
|
37
|
+
attributes = attributes,
|
|
38
|
+
)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import re
|
|
44
|
+
import warnings
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
from typing import Dict, List, Optional, Sequence
|
|
47
|
+
|
|
48
|
+
import pandas as pd
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# 公開API
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
def forms_to_conjoint_data(
|
|
56
|
+
responses_csv: str,
|
|
57
|
+
n_cards: int,
|
|
58
|
+
attributes: "pd.DataFrame | Sequence[Dict[str, Sequence]]",
|
|
59
|
+
*,
|
|
60
|
+
respondent_cols: Optional[Dict[str, str]] = None,
|
|
61
|
+
card_id_prefix: str = "P",
|
|
62
|
+
rating_colname: str = "rating",
|
|
63
|
+
respondent_id_colname: str = "回答者ID",
|
|
64
|
+
card_id_colname: str = "カードID",
|
|
65
|
+
out_csv: Optional[str] = None,
|
|
66
|
+
) -> pd.DataFrame:
|
|
67
|
+
"""
|
|
68
|
+
Google Forms の回答CSVをlong形式DataFrameに変換する。
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
responses_csv : str
|
|
73
|
+
Google Forms からダウンロードした回答CSV(UTF-8)のパス。
|
|
74
|
+
|
|
75
|
+
n_cards : int
|
|
76
|
+
アンケートで提示したカード(プロファイル)の枚数。
|
|
77
|
+
例:4
|
|
78
|
+
|
|
79
|
+
attributes : pd.DataFrame または list of dict
|
|
80
|
+
カード設計を指定する。以下の2形式を受け付ける。
|
|
81
|
+
|
|
82
|
+
【形式A:DataFrameをそのまま渡す(推奨)】
|
|
83
|
+
授業で作成した cards をそのまま渡すことができる。
|
|
84
|
+
行がカード、列が属性に対応する。
|
|
85
|
+
インデックスは ["P1","P2",...] でも整数でも可。
|
|
86
|
+
|
|
87
|
+
例:
|
|
88
|
+
cards = pd.DataFrame([
|
|
89
|
+
{"price": 6, "os": "android", "camera": "standard"},
|
|
90
|
+
{"price": 10, "os": "apple", "camera": "standard"},
|
|
91
|
+
{"price": 6, "os": "apple", "camera": "high"},
|
|
92
|
+
{"price": 10, "os": "android", "camera": "high"},
|
|
93
|
+
], index=["P1", "P2", "P3", "P4"])
|
|
94
|
+
|
|
95
|
+
df = pc.forms_to_conjoint_data(..., attributes=cards)
|
|
96
|
+
|
|
97
|
+
【形式B:辞書のリスト(従来形式)】
|
|
98
|
+
各属性を1辞書で指定する。辞書のキーが属性名、値がカード順の水準リスト。
|
|
99
|
+
|
|
100
|
+
例:
|
|
101
|
+
[
|
|
102
|
+
{"price": [6, 10, 6, 10]},
|
|
103
|
+
{"os": ["android", "apple", "apple", "android"]},
|
|
104
|
+
{"camera": ["standard", "standard", "high", "high"]},
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
いずれの形式でも、行数(または水準リストの長さ)は n_cards と一致する必要がある。
|
|
108
|
+
|
|
109
|
+
respondent_cols : dict, optional
|
|
110
|
+
回答者属性として残したい列の対応辞書。
|
|
111
|
+
{"CSVの列名": "出力DataFrameの列名"} の形式。
|
|
112
|
+
例:{"性別": "gender", "学年": "year"}
|
|
113
|
+
省略した場合は回答者属性を付与しない。
|
|
114
|
+
|
|
115
|
+
card_id_prefix : str, default "P"
|
|
116
|
+
プロファイルIDの接頭辞。"P" なら P1, P2, P3, P4 となる。
|
|
117
|
+
|
|
118
|
+
rating_colname : str, default "rating"
|
|
119
|
+
出力DataFrameの評点列名。
|
|
120
|
+
|
|
121
|
+
respondent_id_colname : str, default "回答者ID"
|
|
122
|
+
出力DataFrameの回答者ID列名。
|
|
123
|
+
|
|
124
|
+
card_id_colname : str, default "カードID"
|
|
125
|
+
出力DataFrameの列名。
|
|
126
|
+
|
|
127
|
+
out_csv : str, optional
|
|
128
|
+
変換後のDataFrameをCSVとして保存するパス。
|
|
129
|
+
省略した場合は保存しない。
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
pd.DataFrame
|
|
134
|
+
long形式のDataFrame。
|
|
135
|
+
列:回答者ID, カードID, rating, [回答者属性], [カード属性]
|
|
136
|
+
|
|
137
|
+
Raises
|
|
138
|
+
------
|
|
139
|
+
FileNotFoundError
|
|
140
|
+
responses_csv が存在しない場合。
|
|
141
|
+
ValueError
|
|
142
|
+
attributes の行数(または水準リストの長さ)が n_cards と一致しない場合。
|
|
143
|
+
評点列が n_cards 列分見つからない場合。
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# ------------------------------------------------------------------
|
|
147
|
+
# 0. 入力チェック
|
|
148
|
+
# attributes が DataFrame の場合は辞書のリスト形式に正規化する
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
attributes = _normalize_attributes(attributes, n_cards)
|
|
151
|
+
_check_attributes(attributes, n_cards)
|
|
152
|
+
|
|
153
|
+
csv_path = Path(responses_csv)
|
|
154
|
+
if not csv_path.exists():
|
|
155
|
+
raise FileNotFoundError(
|
|
156
|
+
f"CSVファイルが見つかりません: {responses_csv}\n"
|
|
157
|
+
"ファイル名とパスを確認してください。"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
# 1. CSV読み込み
|
|
162
|
+
# Google Forms は UTF-8 で出力されるが、BOM付きの場合も吸収する
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
raw = pd.read_csv(csv_path, encoding="utf-8-sig")
|
|
165
|
+
|
|
166
|
+
# ------------------------------------------------------------------
|
|
167
|
+
# 2. Google Forms の管理列を除外して評点列・回答者属性列を特定する
|
|
168
|
+
# ------------------------------------------------------------------
|
|
169
|
+
forms_system_cols = _detect_forms_system_cols(raw)
|
|
170
|
+
|
|
171
|
+
respondent_rename: Dict[str, str] = respondent_cols or {}
|
|
172
|
+
respondent_src_cols = list(respondent_rename.keys())
|
|
173
|
+
|
|
174
|
+
# 管理列でも回答者属性列でもない列が評点列の候補
|
|
175
|
+
non_rating_cols = set(forms_system_cols) | set(respondent_src_cols)
|
|
176
|
+
rating_candidate_cols = [c for c in raw.columns if c not in non_rating_cols]
|
|
177
|
+
|
|
178
|
+
rating_cols = _pick_rating_cols(rating_candidate_cols, raw, n_cards, responses_csv)
|
|
179
|
+
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
# 3. 回答者IDを付与
|
|
182
|
+
# ------------------------------------------------------------------
|
|
183
|
+
n_respondents = len(raw)
|
|
184
|
+
raw[respondent_id_colname] = range(1, n_respondents + 1)
|
|
185
|
+
|
|
186
|
+
# ------------------------------------------------------------------
|
|
187
|
+
# 4. 回答者属性列を選択・リネーム
|
|
188
|
+
# ------------------------------------------------------------------
|
|
189
|
+
keep_cols = [respondent_id_colname] + respondent_src_cols + rating_cols
|
|
190
|
+
df_wide = raw[keep_cols].copy()
|
|
191
|
+
|
|
192
|
+
if respondent_rename:
|
|
193
|
+
df_wide = df_wide.rename(columns=respondent_rename)
|
|
194
|
+
respondent_dst_cols = list(respondent_rename.values())
|
|
195
|
+
else:
|
|
196
|
+
respondent_dst_cols = []
|
|
197
|
+
|
|
198
|
+
# 評点列をカードID(文字列)にリネームして wide→long 変換しやすくする
|
|
199
|
+
card_ids = [f"{card_id_prefix}{i+1}" for i in range(n_cards)]
|
|
200
|
+
rating_rename = dict(zip(rating_cols, card_ids))
|
|
201
|
+
df_wide = df_wide.rename(columns=rating_rename)
|
|
202
|
+
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
# 5. wide → long 変換
|
|
205
|
+
# ------------------------------------------------------------------
|
|
206
|
+
id_vars = [respondent_id_colname] + respondent_dst_cols
|
|
207
|
+
df_long = df_wide.melt(
|
|
208
|
+
id_vars=id_vars,
|
|
209
|
+
value_vars=card_ids,
|
|
210
|
+
var_name=card_id_colname,
|
|
211
|
+
value_name=rating_colname,
|
|
212
|
+
)
|
|
213
|
+
df_long = df_long.sort_values([respondent_id_colname, card_id_colname])
|
|
214
|
+
df_long = df_long.reset_index(drop=True)
|
|
215
|
+
|
|
216
|
+
# ------------------------------------------------------------------
|
|
217
|
+
# 6. カード設計(属性・水準)をマージ
|
|
218
|
+
# ------------------------------------------------------------------
|
|
219
|
+
card_design = _build_card_design(card_ids, attributes, card_id_colname)
|
|
220
|
+
df_long = df_long.merge(card_design, on=card_id_colname)
|
|
221
|
+
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
# 7. 列順を整理:回答者ID, カードID, rating, 回答者属性, カード属性
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
attr_names = [list(a.keys())[0] for a in attributes]
|
|
226
|
+
col_order = (
|
|
227
|
+
[respondent_id_colname, card_id_colname, rating_colname]
|
|
228
|
+
+ respondent_dst_cols
|
|
229
|
+
+ attr_names
|
|
230
|
+
)
|
|
231
|
+
df_long = df_long[col_order]
|
|
232
|
+
|
|
233
|
+
# ------------------------------------------------------------------
|
|
234
|
+
# 8. CSV保存(任意)
|
|
235
|
+
# ------------------------------------------------------------------
|
|
236
|
+
if out_csv is not None:
|
|
237
|
+
df_long.to_csv(out_csv, index=False, encoding="utf-8-sig")
|
|
238
|
+
print(f"保存しました: {out_csv}")
|
|
239
|
+
|
|
240
|
+
return df_long
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
# 内部ヘルパー関数
|
|
245
|
+
# ---------------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
# Google Forms が自動生成する管理列のパターン
|
|
248
|
+
_FORMS_SYSTEM_PATTERNS = [
|
|
249
|
+
r"^timestamp$",
|
|
250
|
+
r"^開始時刻$",
|
|
251
|
+
r"^完了時刻$",
|
|
252
|
+
r"^最終変更時刻$",
|
|
253
|
+
r"^メール(アドレス)?$",
|
|
254
|
+
r"^名前$",
|
|
255
|
+
r"^email$",
|
|
256
|
+
r"^start\s*time$",
|
|
257
|
+
r"^completion\s*time$",
|
|
258
|
+
r"^last\s*modified\s*time$",
|
|
259
|
+
r"^id$",
|
|
260
|
+
r"^名前$",
|
|
261
|
+
]
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _detect_forms_system_cols(df: pd.DataFrame) -> List[str]:
|
|
265
|
+
"""Google Forms の管理列(時刻・メール等)を列名のパターンで検出する。"""
|
|
266
|
+
system = []
|
|
267
|
+
for col in df.columns:
|
|
268
|
+
col_lower = col.strip().lower()
|
|
269
|
+
for pattern in _FORMS_SYSTEM_PATTERNS:
|
|
270
|
+
if re.match(pattern, col_lower, re.IGNORECASE):
|
|
271
|
+
system.append(col)
|
|
272
|
+
break
|
|
273
|
+
return system
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _pick_rating_cols(
|
|
277
|
+
candidates: List[str],
|
|
278
|
+
df: pd.DataFrame,
|
|
279
|
+
n_cards: int,
|
|
280
|
+
csv_path: str,
|
|
281
|
+
) -> List[str]:
|
|
282
|
+
"""
|
|
283
|
+
評点列を candidates から n_cards 列分選ぶ。
|
|
284
|
+
|
|
285
|
+
優先順位:
|
|
286
|
+
1. 候補列の中で数値型の列が n_cards 個ある → それを採用
|
|
287
|
+
2. 候補列の右端 n_cards 列を採用(数値変換できるか確認)
|
|
288
|
+
3. 上記でも取得できなければ ValueError
|
|
289
|
+
"""
|
|
290
|
+
# 数値型の候補列だけ抽出
|
|
291
|
+
numeric_candidates = [
|
|
292
|
+
c for c in candidates
|
|
293
|
+
if pd.api.types.is_numeric_dtype(df[c])
|
|
294
|
+
or _is_coercible_to_numeric(df[c])
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
if len(numeric_candidates) >= n_cards:
|
|
298
|
+
# 右端 n_cards 列を使う(Google Forms は設問順に列が並ぶため)
|
|
299
|
+
selected = numeric_candidates[-n_cards:]
|
|
300
|
+
return selected
|
|
301
|
+
|
|
302
|
+
# フォールバック:候補全体の右端 n_cards 列
|
|
303
|
+
if len(candidates) >= n_cards:
|
|
304
|
+
selected = candidates[-n_cards:]
|
|
305
|
+
# 数値変換できるか確認
|
|
306
|
+
for col in selected:
|
|
307
|
+
if not _is_coercible_to_numeric(df[col]):
|
|
308
|
+
raise ValueError(
|
|
309
|
+
f"評点列の自動検出に失敗しました。\n"
|
|
310
|
+
f"列 '{col}' を数値に変換できません。\n"
|
|
311
|
+
f"CSVの列構造を確認してください: {csv_path}"
|
|
312
|
+
)
|
|
313
|
+
return selected
|
|
314
|
+
|
|
315
|
+
raise ValueError(
|
|
316
|
+
f"評点列が {n_cards} 列分見つかりませんでした。\n"
|
|
317
|
+
f"評点列の候補: {candidates}\n"
|
|
318
|
+
f"n_cards={n_cards} に対して候補が {len(candidates)} 列しかありません。\n"
|
|
319
|
+
f"CSVの列構造を確認してください: {csv_path}"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _is_coercible_to_numeric(series: pd.Series) -> bool:
|
|
324
|
+
"""pd.to_numeric で変換できるか(NaN以外の値が1つ以上あるか)を確認する。"""
|
|
325
|
+
converted = pd.to_numeric(series, errors="coerce")
|
|
326
|
+
return converted.notna().any()
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _build_card_design(
|
|
330
|
+
card_ids: List[str],
|
|
331
|
+
attributes: Sequence[Dict[str, Sequence]],
|
|
332
|
+
card_id_colname: str,
|
|
333
|
+
) -> pd.DataFrame:
|
|
334
|
+
"""カードID と属性・水準の対応テーブルを作成する。"""
|
|
335
|
+
data: Dict[str, list] = {card_id_colname: card_ids}
|
|
336
|
+
for attr_dict in attributes:
|
|
337
|
+
attr_name, levels = list(attr_dict.items())[0]
|
|
338
|
+
data[attr_name] = list(levels)
|
|
339
|
+
return pd.DataFrame(data)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _normalize_attributes(
|
|
343
|
+
attributes: "pd.DataFrame | Sequence[Dict[str, Sequence]]",
|
|
344
|
+
n_cards: int,
|
|
345
|
+
) -> "List[Dict[str, list]]":
|
|
346
|
+
"""
|
|
347
|
+
attributes を内部処理用の「辞書のリスト」形式に統一する。
|
|
348
|
+
|
|
349
|
+
- pd.DataFrame が渡された場合:列ごとに {列名: 値リスト} の辞書に変換する
|
|
350
|
+
- 辞書のリストが渡された場合:そのまま返す
|
|
351
|
+
"""
|
|
352
|
+
if isinstance(attributes, pd.DataFrame):
|
|
353
|
+
if len(attributes) != n_cards:
|
|
354
|
+
raise ValueError(
|
|
355
|
+
f"cards(attributes)の行数 ({len(attributes)}) が "
|
|
356
|
+
f"n_cards ({n_cards}) と一致しません。\n"
|
|
357
|
+
f"cards の行数と n_cards を同じ値にしてください。"
|
|
358
|
+
)
|
|
359
|
+
return [
|
|
360
|
+
{col: list(attributes[col])}
|
|
361
|
+
for col in attributes.columns
|
|
362
|
+
]
|
|
363
|
+
# 辞書のリスト形式はそのまま返す
|
|
364
|
+
return list(attributes)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _check_attributes(
|
|
368
|
+
attributes: "List[Dict[str, list]]",
|
|
369
|
+
n_cards: int,
|
|
370
|
+
) -> None:
|
|
371
|
+
"""attributes の構造と水準数を検証する。"""
|
|
372
|
+
if not attributes:
|
|
373
|
+
raise ValueError("attributes が空です。少なくとも1つの属性を指定してください。")
|
|
374
|
+
|
|
375
|
+
if len(attributes) == 1:
|
|
376
|
+
warnings.warn(
|
|
377
|
+
"属性が1つしかありません。\n"
|
|
378
|
+
"属性が1つの場合、複数属性間のトレードオフが測れないため、\n"
|
|
379
|
+
"支払意思額(WTP)の計算ができません。\n"
|
|
380
|
+
"コンジョイント分析の導入として使う場合は問題ありませんが、\n"
|
|
381
|
+
"本分析では属性を2つ以上にすることを推奨します。",
|
|
382
|
+
UserWarning,
|
|
383
|
+
stacklevel=4,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
for i, attr_dict in enumerate(attributes):
|
|
387
|
+
if not isinstance(attr_dict, dict) or len(attr_dict) != 1:
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"attributes[{i}] は キー1つの辞書である必要があります。\n"
|
|
390
|
+
f"例:{{\"wage\": [1000, 1300, 1000, 1300]}}\n"
|
|
391
|
+
f"実際の値:{attr_dict}"
|
|
392
|
+
)
|
|
393
|
+
attr_name, levels = list(attr_dict.items())[0]
|
|
394
|
+
if len(levels) != n_cards:
|
|
395
|
+
raise ValueError(
|
|
396
|
+
f"属性 '{attr_name}' の水準リストの長さ ({len(levels)}) が "
|
|
397
|
+
f"n_cards ({n_cards}) と一致しません。\n"
|
|
398
|
+
f"水準リスト: {list(levels)}"
|
|
399
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py4conjoint
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するツール
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Py4Conjoint/py4conjoint
|
|
7
|
+
Keywords: conjoint,survey,marketing,pandas
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Intended Audience :: Education
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: pandas>=1.5
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# py4conjoint
|
|
20
|
+
|
|
21
|
+
Google Forms の回答CSVを評点型コンジョイント分析用のlong形式DataFrameに変換するPythonパッケージです。
|
|
22
|
+
|
|
23
|
+
## インストール
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install py4conjoint
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Google Colab では:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
!pip install py4conjoint
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## 使い方
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import py4conjoint as pc
|
|
40
|
+
|
|
41
|
+
# カード設計(プロファイル)を作成
|
|
42
|
+
cards = pd.DataFrame({ # P1 P2 P3 P4
|
|
43
|
+
"price": [6, 10, 6, 10],
|
|
44
|
+
"os": ["android", "apple", "apple", "android"],
|
|
45
|
+
"camera": ["標準", "標準", "高性能", "高性能"]
|
|
46
|
+
}, index=["P1", "P2", "P3", "P4"])
|
|
47
|
+
|
|
48
|
+
# Google Forms の回答CSVをlong形式に変換
|
|
49
|
+
df = pc.forms_to_conjoint_data(
|
|
50
|
+
responses_csv = "responses.csv",
|
|
51
|
+
n_cards = 4,
|
|
52
|
+
attributes = cards,
|
|
53
|
+
respondent_cols= {"性別": "gender"},
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## 出力形式
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
回答者ID プロファイルID rating gender price os camera
|
|
61
|
+
0 1 P1 4 女性 6 android 標準
|
|
62
|
+
1 1 P2 3 女性 10 apple 標準
|
|
63
|
+
...
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## ライセンス
|
|
67
|
+
|
|
68
|
+
MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/py4conjoint/__init__.py
|
|
5
|
+
src/py4conjoint.egg-info/PKG-INFO
|
|
6
|
+
src/py4conjoint.egg-info/SOURCES.txt
|
|
7
|
+
src/py4conjoint.egg-info/dependency_links.txt
|
|
8
|
+
src/py4conjoint.egg-info/requires.txt
|
|
9
|
+
src/py4conjoint.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pandas>=1.5
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
py4conjoint
|