litescrape 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nishizawa Takamasa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,327 @@
1
+ Metadata-Version: 2.4
2
+ Name: litescrape
3
+ Version: 0.1.1
4
+ Summary: 自分用・非汎用
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: patchright>=1.40
9
+ Requires-Dist: playwright>=1.40
10
+ Requires-Dist: selectolax>=0.3
11
+ Requires-Dist: pyarrow>=23.0
12
+ Requires-Dist: camoufox>=0.4
13
+ Requires-Dist: loguru>=0.7
14
+ Requires-Dist: tqdm>=4.66
15
+
16
+ # LiteScrape
17
+
18
+ 自分用・非汎用
19
+
20
+ ## インストール
21
+ `uv add litescrape`
22
+
23
+ ※ `run_patchright` を使うとき:Google ChromeをPCにインストールしておく。
24
+ ※ `run_camoufox` を使うとき:`uv run camoufox fetch`
25
+
26
+ ## 実装機能
27
+
28
+ ### litescrape
29
+
30
+ - `LitePage`
31
+ - `LiteElement`
32
+ - `LiteElementGroup`
33
+ - `ElementScan`
34
+ - `LiteFrame`
35
+ - `LiteShadowRoot`
36
+ - `LiteParser`
37
+ - `LiteNode`
38
+ - `LiteNodeGroup`
39
+ - `NodeScan`
40
+
41
+ ### litescrape.utils
42
+
43
+ - `parse_html(path: Path) -> LexborHTMLParser | None`
44
+ - `meta_html(meta: Mapping[str, object | None]) -> str`
45
+ - `from_here(file: str) -> Callable[[str], Path]`
46
+ - `append_csv(path: Path, row: dict) -> None`
47
+ - `write_csv(path: Path, rows: list[dict]) -> None`
48
+ - `write_parquet(path: Path, rows: list[dict]) -> None`
49
+ - `hash_name(key: str) -> str`
50
+ - `write_text(path: Path, data: str) -> bool`
51
+ - `write_bytes(path: Path, data: bytes) -> bool`
52
+ - `save_log(path: Path, level: str = 'WARNING') -> None`
53
+ - `process_map[T, R](worker: Callable[[T], R], items: Iterable[T], workers: int | None = None, *, chunksize: int | None = None) -> list[R | None]`
54
+ - `glob_paths(dir_path: Path, pattern: str = '*.html') -> list[str]`
55
+ - `counter(start: int = 1) -> Iterator[int]`
56
+
57
+ ### litescrape.browser
58
+
59
+ - `Span`
60
+ - `run_patchright(*, browser: dict | None = None, context: dict | None = None, span: Span | None = None) -> PatchrightRunner`
61
+ - `run_camoufox(*, browser: dict | None = None, context: dict | None = None, span: Span | None = None) -> CamoufoxRunner`
62
+ - `PatchrightRunner.page() -> Page` / `CamoufoxRunner.page() -> Page`
63
+
64
+ `browser` / `context` は Playwright へ渡す起動オプション。`span` は litescrape の再生成間隔(`page()` 呼び出し回数ごとに独立して効く。省略時は再生成しない)。`page()` を呼ぶたびに内部カウントが 1 進む。
65
+
66
+ ## 使用例
67
+
68
+ ### crawl.py
69
+ ```python
70
+ from urllib.parse import urlencode
71
+
72
+ from litescrape import lite_page
73
+ from litescrape.browser import Span, run_patchright
74
+ from litescrape.utils import save_log, from_here, counter, write_csv
75
+
76
+ here = from_here(__file__)
77
+ save_log(here('log/crawling.log'))
78
+
79
+ with run_patchright(
80
+ browser={'channel': 'chrome', 'headless': False},
81
+ context={'viewport': {'width': 1920, 'height': 1080}},
82
+ span=Span(browser=300, context=100, page=20),
83
+ ) as pr:
84
+ page = s.page()
85
+ p = lite_page(page)
86
+ p.goto('https://home.katitas.jp/buyers_search')
87
+ prefecture_urls = p.ii('div ul li a[href^="https://home.katitas.jp/buyers_search/area"]').urls
88
+
89
+ n = len(prefecture_urls)
90
+ urls = []
91
+ for i, prefecture_url in enumerate(prefecture_urls):
92
+ print(f'prefecture_url {i}/{n - 1}')
93
+ for page_num in counter():
94
+ page = s.page()
95
+ p = lite_page(page)
96
+ if not p.goto(f'{prefecture_url}?{urlencode({"page": page_num})}', sleep_after=(0.5, 1)):
97
+ break
98
+ if not (bukken_elems := p.ii('ul li div a[href^="https://home.katitas.jp"]:has(p)')):
99
+ break
100
+ urls.extend(bukken_elems.urls)
101
+ write_csv(here('csv/urls.csv'), [{'url': url} for url in urls])
102
+ ```
103
+
104
+ ### scrape.py
105
+ ```python
106
+ from datetime import datetime, timezone
107
+ import time
108
+
109
+ import pandas as pd
110
+
111
+ from litescrape import lite_page
112
+ from litescrape.browser import Span, run_patchright
113
+ from litescrape.utils import (
114
+ save_log,
115
+ append_csv,
116
+ from_here,
117
+ meta_html,
118
+ hash_name,
119
+ write_text,
120
+ write_bytes,
121
+ )
122
+
123
+ here = from_here(__file__)
124
+ save_log(here('log/scraping.log'))
125
+
126
+ items = list(pd.read_csv(here('csv/urls.csv'))['url'].items())
127
+ n = len(items)
128
+
129
+ with run_patchright(
130
+ browser={'channel': 'chrome', 'headless': False},
131
+ context={'viewport': {'width': 1920, 'height': 1080}},
132
+ span=Span(browser=300, context=100),
133
+ ) as pr:
134
+ for url_index, request_url in items:
135
+ print(f'url_index {url_index}/{n - 1}')
136
+ page = s.page()
137
+ p = lite_page(page)
138
+ if not p.goto(request_url):
139
+ append_csv(here('csv/failed.csv'), {
140
+ 'url_index': url_index,
141
+ 'request_url': request_url,
142
+ 'reason': 'goto',
143
+ })
144
+ continue
145
+ html = meta_html({
146
+ 'litescrape:url_index': url_index,
147
+ 'litescrape:saved_at': datetime.now(timezone.utc),
148
+ 'litescrape:request_url': request_url,
149
+ 'litescrape:final_url': page.url,
150
+ }) + page.content()
151
+ if not write_text(here('html') / f'{hash_name(page.url)}.html', html):
152
+ append_csv(here('csv/failed.csv'), {
153
+ 'url_index': url_index,
154
+ 'request_url': request_url,
155
+ 'reason': 'write_text',
156
+ })
157
+
158
+ page.screenshot(path=here(f'media/{url_index}-full-page.png'), full_page=True)
159
+
160
+ elem_iframe = p.i('iframe[src^="https://home.katitas.jp"]')
161
+ elem_iframe.scroll_into_view()
162
+ time.sleep(3)
163
+ elem_iframe.screenshot(here(f'media/{url_index}-gmap.png'), isolate=True)
164
+
165
+ img_li_scan = p.ii('p.text-left').scan.m(r'画像をクリックすると拡大画像がご覧に').n('ul').ii('li').scan
166
+ img_li = img_li_scan.m(r'外観') or img_li_scan.m(r'^(?!.*間取).*')
167
+ img_url = img_li.i('a').url
168
+ if (body := p.bytes_at(img_url)):
169
+ write_bytes(here(f'media/{url_index}-img-desc.jpg'), body)
170
+
171
+ main_img_url = p.i('img.w-full.object-contain').src
172
+ if (body := p.bytes_at(main_img_url)):
173
+ write_bytes(here(f'media/{url_index}-img-main.jpg'), body)
174
+ ```
175
+
176
+ ### extract.py
177
+ ```python
178
+ from pathlib import Path
179
+
180
+ from litescrape import lite_parser
181
+ from litescrape.utils import from_here, glob_paths, parse_html, process_map, write_parquet
182
+
183
+ def main():
184
+ here = from_here(__file__)
185
+ html_paths = glob_paths(here('html'), '*.html')
186
+ results = [r for r in process_map(extract, html_paths) if r]
187
+ write_parquet(here('parquet/extract.parquet'), results)
188
+
189
+ def extract(file_path: str) -> dict | None:
190
+ if not (parser := parse_html(Path(file_path))):
191
+ return None
192
+ p = lite_parser(parser)
193
+ dt_scan = p.ii('dt').scan
194
+ dd_text = lambda pattern: dt_scan.m(pattern).n('dd').text
195
+ return {
196
+ 'url_index': p.i('meta[name="litescrape:url_index"]').attr('content'),
197
+ 'saved_at': p.i('meta[name="litescrape:saved_at"]').attr('content'),
198
+ 'request_url': p.i('meta[name="litescrape:request_url"]').attr('content'),
199
+ 'final_url': p.i('meta[name="litescrape:final_url"]').attr('content'),
200
+ 'ファイル名': Path(file_path).name,
201
+
202
+ '取り扱い店舗': p.ii('p').scan.m(r'取り扱い店舗').n('p').text,
203
+
204
+ '価格': dd_text(r'価格'),
205
+ '月々の支払い': dd_text(r'月々の支払い'),
206
+ '間取': dd_text(r'間取'),
207
+ '土地面積': dd_text(r'土地面積'),
208
+ '建物面積': dd_text(r'建物面積'),
209
+
210
+ '所在地': dd_text(r'所在地'),
211
+ '交通': dd_text(r'交通'),
212
+ '接道状況': dd_text(r'接道状況'),
213
+ '私道面積': dd_text(r'私道面積'),
214
+ 'セットバック': dd_text(r'セットバック'),
215
+ '建物構造': dd_text(r'建物構造'),
216
+ '国土法提出': dd_text(r'国土法提出'),
217
+ '駐車場': dd_text(r'駐車場'),
218
+ '車庫区分': dd_text(r'車庫区分'),
219
+ '都市計画': dd_text(r'都市計画'),
220
+ '物件種別': dd_text(r'物件種別'),
221
+ '建ぺい率 /容積率': dd_text(r'建ぺい率.*容積率'),
222
+ '土地権利': dd_text(r'土地権利'),
223
+ '地目': dd_text(r'地目'),
224
+ '築年月': dd_text(r'築年月'),
225
+ '取引態様': dd_text(r'取引態様'),
226
+ '引渡日(入居予定日)': dd_text(r'引渡日.*入居予定日'),
227
+ '用途地域': dd_text(r'用途地域'),
228
+ '現況': dd_text(r'現況'),
229
+ '設備・条件': dd_text(r'設備.*条件'),
230
+ '備考': dd_text(r'備考'),
231
+ '最寄りの学校': dd_text(r'最寄.*の学校'),
232
+ '物件番号': dd_text(r'物件番号'),
233
+ '情報更新日': dd_text(r'情報更新日'),
234
+ '次回更新予定日': dd_text(r'次回更新予定日'),
235
+
236
+ 'スタッフからのコメント': p.ii('div').scan.m(r'スタッフからのコメント').n('div').text,
237
+ '物件の魅力': p.ii('p').scan.m(r'物件の魅力').n('p').text,
238
+
239
+ 'img_desc': '\n'.join(p.ii('p.text-left').scan.m(r'画像をクリックすると拡大画像がご覧に').n('ul').ii('li').texts)
240
+ }
241
+
242
+ if __name__ == '__main__':
243
+ main()
244
+ ```
245
+
246
+ ### clean.ipynb
247
+ ```python
248
+ import re
249
+
250
+ import pandas as pd
251
+ ```
252
+ ```python
253
+ df_shikutyoson = pd.read_csv('./shikutyoson.csv')
254
+ cities = df_shikutyoson["市区町村"].dropna().sort_values(key=lambda x: x.str.len(), ascending=False)
255
+ shikutyoson_pattern = "|".join(cities.map(lambda x: re.escape(x)))
256
+ ```
257
+ ```python
258
+ df_raw = pd.read_parquet('parquet/extract.parquet')
259
+ df_raw = df_raw.apply(lambda x: x.fillna('').str.normalize('NFKC').str.strip())
260
+ ```
261
+ ```python
262
+ df = df_raw.sort_values('saved_at')[['url_index', 'saved_at', 'request_url', 'final_url']].copy()
263
+
264
+ df['事例種別'] = df_raw['物件種別'].str.contains(r'中古|土地').map({True: '中古売出'})
265
+ df['総額'] = (
266
+ df_raw['価格']
267
+ .str.extract(r'([,\d]+)\s*万円', expand=False)
268
+ .replace(',', '', regex=True)
269
+ .pipe(lambda s: pd.to_numeric(s, errors='coerce') * 10000)
270
+ )
271
+ df['土地面積'] = df_raw['土地面積'].str.extract(r'([\d\.]+)')
272
+ df['建物面積'] = df_raw['建物面積'].str.extract(r'([\d\.]+)')
273
+ df['建物種別'] = df_raw['物件種別'].map({'中古戸建': '戸建て', '中古マンション': 'マンション', '土地': '土地'})
274
+ df[['所在都道府県', '所在市', '所在字', '所在番地']] = df_raw['所在地'].str.extract(fr'^(京都府|.+?[都道府県])({shikutyoson_pattern})(\D*)(.*)')
275
+
276
+ s1 = (
277
+ df_raw['築年月']
278
+ .replace({r'元年': r'1年'}, regex=True)
279
+ .str.extract(r'(\d+)年', expand=False)
280
+ .pipe(lambda s: pd.to_numeric(s, errors='coerce'))
281
+ )
282
+ s2 = df_raw['築年月'].str[:2].map({'令和': 2018, '平成': 1988, '昭和': 1925, '大正': 1911, '明治': 1867})
283
+ df['建築年'] = s1 + s2
284
+
285
+ df['構造体'] = df_raw['建物構造'].str.extract(r'^(\S+)')
286
+ df['階層'] = df_raw['建物構造'].str.extract(r'(\d+)階')
287
+ df['リノベ内容'] = df_raw['備考'].str.extract(r'(?s)^(20\d{2}/.*?)\n\D')
288
+ df['間取'] = df_raw['間取']
289
+ df['成約年月'] = df_raw['現況'].map({'空': '販売中', '古家付': '販売中'})
290
+ df['私道負担'] = df_raw['私道面積']
291
+ df['接道'] = df_raw['接道状況']
292
+
293
+ s1 = df_raw['最寄りの学校'].str.extract(r'([^/\s【】・、(]+?小学校)', expand=False)
294
+ s2 = df_raw['物件の魅力'].str.extract(r'([^/\s【】・、(]+?小学校)', expand=False)
295
+ s3 = df_raw['備考'].str.extract(r'([^/\s【】・、(]+?小学校)', expand=False)
296
+ s4 = df_raw['img_desc'].str.extract(r'([^/\s【】・、(]+?小学校)', expand=False)
297
+ df['小学校'] = s1.fillna(s2).fillna(s3).fillna(s4)
298
+
299
+ s1 = df_raw['最寄りの学校'].str.extract(r'([^/\s【】・、(]+?中学校)', expand=False)
300
+ s2 = df_raw['物件の魅力'].str.extract(r'([^/\s【】・、(]+?中学校)', expand=False)
301
+ s3 = df_raw['備考'].str.extract(r'([^/\s【】・、(]+?中学校)', expand=False)
302
+ s4 = df_raw['img_desc'].str.extract(r'([^/\s【】・、(]+?中学校)', expand=False)
303
+ df['中学校'] = s1.fillna(s2).fillna(s3).fillna(s4)
304
+
305
+ df['周辺環境'] = df_raw['備考'].map(lambda x: '\n'.join(l for l in x.splitlines() if re.search(r'(?:\d分|\dm)$', l)))
306
+ df['都市計画'] = df_raw['都市計画']
307
+ df['用途地域'] = df_raw['用途地域']
308
+ df[['建ぺい率', '容積率']] = df_raw['建ぺい率 /容積率'].str.extract(r'(\d+%)\D*(\d+%)')
309
+ df['水道'] = df_raw['設備・条件'].str.extract(r'(公営水道|上水道)')
310
+ df['下水'] = df_raw['設備・条件'].str.extract(r'(本下水|個別浄化槽|汲取|下水道)')
311
+ df['ガス'] = df_raw['設備・条件'].str.extract(r'(個別LPG|集中LPG|都市ガス|プロパンガス|オール電化)')
312
+ df['契約態様'] = df_raw['取引態様']
313
+ df['問合せ先'] = df_raw['取り扱い店舗']
314
+ df['駐車場'] = df_raw['駐車場']
315
+ df['交通'] = df_raw['交通']
316
+ df['物件の特徴'] = df_raw['物件の魅力']
317
+ df['仕様'] = df_raw['設備・条件']
318
+
319
+ df['土地権利'] = df_raw['土地権利']
320
+ df['地目'] = df_raw['地目']
321
+ df['引渡日(入居予定日)'] = df_raw['引渡日(入居予定日)']
322
+ df['物件番号'] = df_raw['物件番号']
323
+ df['情報更新日'] = df_raw['情報更新日']
324
+ ```
325
+ ```python
326
+ df.to_clipboard(index=False)
327
+ ```